xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision e7e72b3d0edcd0d15e7f68c03be08666507fc872)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "petscblaslapack.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscTruth     aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 
207 #define CHUNKSIZE   15
208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
209 { \
210     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
211     lastcol1 = col;\
212     while (high1-low1 > 5) { \
213       t = (low1+high1)/2; \
214       if (rp1[t] > col) high1 = t; \
215       else             low1  = t; \
216     } \
217       for (_i=low1; _i<high1; _i++) { \
218         if (rp1[_i] > col) break; \
219         if (rp1[_i] == col) { \
220           if (addv == ADD_VALUES) ap1[_i] += value;   \
221           else                    ap1[_i] = value; \
222           goto a_noinsert; \
223         } \
224       }  \
225       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
226       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
227       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
228       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
229       N = nrow1++ - 1; a->nz++; high1++; \
230       /* shift up all the later entries in this row */ \
231       for (ii=N; ii>=_i; ii--) { \
232         rp1[ii+1] = rp1[ii]; \
233         ap1[ii+1] = ap1[ii]; \
234       } \
235       rp1[_i] = col;  \
236       ap1[_i] = value;  \
237       a_noinsert: ; \
238       ailen[row] = nrow1; \
239 }
240 
241 
242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
243 { \
244     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
245     lastcol2 = col;\
246     while (high2-low2 > 5) { \
247       t = (low2+high2)/2; \
248       if (rp2[t] > col) high2 = t; \
249       else             low2  = t; \
250     } \
251     for (_i=low2; _i<high2; _i++) {		\
252       if (rp2[_i] > col) break;			\
253       if (rp2[_i] == col) {			      \
254 	if (addv == ADD_VALUES) ap2[_i] += value;     \
255 	else                    ap2[_i] = value;      \
256 	goto b_noinsert;			      \
257       }						      \
258     }							      \
259     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
260     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
261     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
262     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
263     N = nrow2++ - 1; b->nz++; high2++;					\
264     /* shift up all the later entries in this row */			\
265     for (ii=N; ii>=_i; ii--) {						\
266       rp2[ii+1] = rp2[ii];						\
267       ap2[ii+1] = ap2[ii];						\
268     }									\
269     rp2[_i] = col;							\
270     ap2[_i] = value;							\
271     b_noinsert: ;								\
272     bilen[row] = nrow2;							\
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
278 {
279   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
280   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
281   PetscErrorCode ierr;
282   PetscInt       l,*garray = mat->garray,diag;
283 
284   PetscFunctionBegin;
285   /* code only works for square matrices A */
286 
287   /* find size of row to the left of the diagonal part */
288   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
289   row  = row - diag;
290   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
291     if (garray[b->j[b->i[row]+l]] > diag) break;
292   }
293   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
294 
295   /* diagonal part */
296   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
297 
298   /* right of diagonal part */
299   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
300   PetscFunctionReturn(0);
301 }
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatSetValues_MPIAIJ"
305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
306 {
307   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
308   PetscScalar    value;
309   PetscErrorCode ierr;
310   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
311   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
312   PetscTruth     roworiented = aij->roworiented;
313 
314   /* Some Variables required in the macro */
315   Mat            A = aij->A;
316   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
317   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
318   MatScalar      *aa = a->a;
319   PetscTruth     ignorezeroentries = a->ignorezeroentries;
320   Mat            B = aij->B;
321   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
322   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
323   MatScalar      *ba = b->a;
324 
325   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
326   PetscInt       nonew = a->nonew;
327   MatScalar      *ap1,*ap2;
328 
329   PetscFunctionBegin;
330   if (v) PetscValidScalarPointer(v,6);
331   for (i=0; i<m; i++) {
332     if (im[i] < 0) continue;
333 #if defined(PETSC_USE_DEBUG)
334     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
335 #endif
336     if (im[i] >= rstart && im[i] < rend) {
337       row      = im[i] - rstart;
338       lastcol1 = -1;
339       rp1      = aj + ai[row];
340       ap1      = aa + ai[row];
341       rmax1    = aimax[row];
342       nrow1    = ailen[row];
343       low1     = 0;
344       high1    = nrow1;
345       lastcol2 = -1;
346       rp2      = bj + bi[row];
347       ap2      = ba + bi[row];
348       rmax2    = bimax[row];
349       nrow2    = bilen[row];
350       low2     = 0;
351       high2    = nrow2;
352 
353       for (j=0; j<n; j++) {
354         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
355         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
356         if (in[j] >= cstart && in[j] < cend){
357           col = in[j] - cstart;
358           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
359         } else if (in[j] < 0) continue;
360 #if defined(PETSC_USE_DEBUG)
361         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
362 #endif
363         else {
364           if (mat->was_assembled) {
365             if (!aij->colmap) {
366               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
367             }
368 #if defined (PETSC_USE_CTABLE)
369             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
370 	    col--;
371 #else
372             col = aij->colmap[in[j]] - 1;
373 #endif
374             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
375               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
376               col =  in[j];
377               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
378               B = aij->B;
379               b = (Mat_SeqAIJ*)B->data;
380               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
381               rp2      = bj + bi[row];
382               ap2      = ba + bi[row];
383               rmax2    = bimax[row];
384               nrow2    = bilen[row];
385               low2     = 0;
386               high2    = nrow2;
387               bm       = aij->B->rmap->n;
388               ba = b->a;
389             }
390           } else col = in[j];
391           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
392         }
393       }
394     } else {
395       if (!aij->donotstash) {
396         if (roworiented) {
397           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
398         } else {
399           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
400         }
401       }
402     }
403   }
404   PetscFunctionReturn(0);
405 }
406 
407 #undef __FUNCT__
408 #define __FUNCT__ "MatGetValues_MPIAIJ"
409 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
410 {
411   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
412   PetscErrorCode ierr;
413   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
414   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
415 
416   PetscFunctionBegin;
417   for (i=0; i<m; i++) {
418     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
419     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
420     if (idxm[i] >= rstart && idxm[i] < rend) {
421       row = idxm[i] - rstart;
422       for (j=0; j<n; j++) {
423         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
424         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
425         if (idxn[j] >= cstart && idxn[j] < cend){
426           col = idxn[j] - cstart;
427           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
428         } else {
429           if (!aij->colmap) {
430             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
431           }
432 #if defined (PETSC_USE_CTABLE)
433           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
434           col --;
435 #else
436           col = aij->colmap[idxn[j]] - 1;
437 #endif
438           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
439           else {
440             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
441           }
442         }
443       }
444     } else {
445       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
446     }
447   }
448   PetscFunctionReturn(0);
449 }
450 
451 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
452 
453 #undef __FUNCT__
454 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
455 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
456 {
457   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
458   PetscErrorCode ierr;
459   PetscInt       nstash,reallocs;
460   InsertMode     addv;
461 
462   PetscFunctionBegin;
463   if (aij->donotstash) {
464     PetscFunctionReturn(0);
465   }
466 
467   /* make sure all processors are either in INSERTMODE or ADDMODE */
468   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
469   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
470   mat->insertmode = addv; /* in case this processor had no cache */
471 
472   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
473   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
474   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
475   PetscFunctionReturn(0);
476 }
477 
478 #undef __FUNCT__
479 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
480 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
481 {
482   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
483   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
484   PetscErrorCode ierr;
485   PetscMPIInt    n;
486   PetscInt       i,j,rstart,ncols,flg;
487   PetscInt       *row,*col;
488   PetscTruth     other_disassembled;
489   PetscScalar    *val;
490   InsertMode     addv = mat->insertmode;
491 
492   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
493   PetscFunctionBegin;
494   if (!aij->donotstash) {
495     while (1) {
496       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
497       if (!flg) break;
498 
499       for (i=0; i<n;) {
500         /* Now identify the consecutive vals belonging to the same row */
501         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
502         if (j < n) ncols = j-i;
503         else       ncols = n-i;
504         /* Now assemble all these values with a single function call */
505         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
506         i = j;
507       }
508     }
509     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
510   }
511   a->compressedrow.use     = PETSC_FALSE;
512   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
513   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
514 
515   /* determine if any processor has disassembled, if so we must
516      also disassemble ourselfs, in order that we may reassemble. */
517   /*
518      if nonzero structure of submatrix B cannot change then we know that
519      no processor disassembled thus we can skip this stuff
520   */
521   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
522     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
523     if (mat->was_assembled && !other_disassembled) {
524       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
525     }
526   }
527   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
528     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
529   }
530   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
531   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
532   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
533   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
534 
535   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
536   aij->rowvalues = 0;
537 
538   /* used by MatAXPY() */
539   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
540   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
541 
542   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
543   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
544   PetscFunctionReturn(0);
545 }
546 
547 #undef __FUNCT__
548 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
549 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
550 {
551   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
552   PetscErrorCode ierr;
553 
554   PetscFunctionBegin;
555   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
556   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatZeroRows_MPIAIJ"
562 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
563 {
564   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
565   PetscErrorCode ierr;
566   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
567   PetscInt       i,*owners = A->rmap->range;
568   PetscInt       *nprocs,j,idx,nsends,row;
569   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
570   PetscInt       *rvalues,count,base,slen,*source;
571   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
572   MPI_Comm       comm = ((PetscObject)A)->comm;
573   MPI_Request    *send_waits,*recv_waits;
574   MPI_Status     recv_status,*send_status;
575 #if defined(PETSC_DEBUG)
576   PetscTruth     found = PETSC_FALSE;
577 #endif
578 
579   PetscFunctionBegin;
580   /*  first count number of contributors to each processor */
581   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
582   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
583   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
584   j = 0;
585   for (i=0; i<N; i++) {
586     if (lastidx > (idx = rows[i])) j = 0;
587     lastidx = idx;
588     for (; j<size; j++) {
589       if (idx >= owners[j] && idx < owners[j+1]) {
590         nprocs[2*j]++;
591         nprocs[2*j+1] = 1;
592         owner[i] = j;
593 #if defined(PETSC_DEBUG)
594         found = PETSC_TRUE;
595 #endif
596         break;
597       }
598     }
599 #if defined(PETSC_DEBUG)
600     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
601     found = PETSC_FALSE;
602 #endif
603   }
604   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
605 
606   /* inform other processors of number of messages and max length*/
607   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
608 
609   /* post receives:   */
610   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
611   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
612   for (i=0; i<nrecvs; i++) {
613     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
614   }
615 
616   /* do sends:
617       1) starts[i] gives the starting index in svalues for stuff going to
618          the ith processor
619   */
620   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
621   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
622   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
623   starts[0] = 0;
624   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
625   for (i=0; i<N; i++) {
626     svalues[starts[owner[i]]++] = rows[i];
627   }
628 
629   starts[0] = 0;
630   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
631   count = 0;
632   for (i=0; i<size; i++) {
633     if (nprocs[2*i+1]) {
634       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
635     }
636   }
637   ierr = PetscFree(starts);CHKERRQ(ierr);
638 
639   base = owners[rank];
640 
641   /*  wait on receives */
642   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
643   count  = nrecvs; slen = 0;
644   while (count) {
645     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
646     /* unpack receives into our local space */
647     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
648     source[imdex]  = recv_status.MPI_SOURCE;
649     lens[imdex]    = n;
650     slen          += n;
651     count--;
652   }
653   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
654 
655   /* move the data into the send scatter */
656   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
657   count = 0;
658   for (i=0; i<nrecvs; i++) {
659     values = rvalues + i*nmax;
660     for (j=0; j<lens[i]; j++) {
661       lrows[count++] = values[j] - base;
662     }
663   }
664   ierr = PetscFree(rvalues);CHKERRQ(ierr);
665   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
666   ierr = PetscFree(owner);CHKERRQ(ierr);
667   ierr = PetscFree(nprocs);CHKERRQ(ierr);
668 
669   /* actually zap the local rows */
670   /*
671         Zero the required rows. If the "diagonal block" of the matrix
672      is square and the user wishes to set the diagonal we use separate
673      code so that MatSetValues() is not called for each diagonal allocating
674      new memory, thus calling lots of mallocs and slowing things down.
675 
676   */
677   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
678   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
679   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
680     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
681   } else if (diag != 0.0) {
682     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
683     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
684       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
685 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
686     }
687     for (i = 0; i < slen; i++) {
688       row  = lrows[i] + rstart;
689       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
690     }
691     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
692     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
693   } else {
694     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
695   }
696   ierr = PetscFree(lrows);CHKERRQ(ierr);
697 
698   /* wait on sends */
699   if (nsends) {
700     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
701     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
702     ierr = PetscFree(send_status);CHKERRQ(ierr);
703   }
704   ierr = PetscFree(send_waits);CHKERRQ(ierr);
705   ierr = PetscFree(svalues);CHKERRQ(ierr);
706 
707   PetscFunctionReturn(0);
708 }
709 
710 #undef __FUNCT__
711 #define __FUNCT__ "MatMult_MPIAIJ"
712 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
713 {
714   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
715   PetscErrorCode ierr;
716   PetscInt       nt;
717 
718   PetscFunctionBegin;
719   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
720   if (nt != A->cmap->n) {
721     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
722   }
723   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
724   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
725   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
726   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
727   PetscFunctionReturn(0);
728 }
729 
730 #undef __FUNCT__
731 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
732 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
733 {
734   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
735   PetscErrorCode ierr;
736 
737   PetscFunctionBegin;
738   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "MatMultAdd_MPIAIJ"
744 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
745 {
746   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
747   PetscErrorCode ierr;
748 
749   PetscFunctionBegin;
750   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
751   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
752   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
753   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
754   PetscFunctionReturn(0);
755 }
756 
757 #undef __FUNCT__
758 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
759 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
760 {
761   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
762   PetscErrorCode ierr;
763   PetscTruth     merged;
764 
765   PetscFunctionBegin;
766   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
767   /* do nondiagonal part */
768   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
769   if (!merged) {
770     /* send it on its way */
771     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
772     /* do local part */
773     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
774     /* receive remote parts: note this assumes the values are not actually */
775     /* added in yy until the next line, */
776     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
777   } else {
778     /* do local part */
779     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
780     /* send it on its way */
781     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
782     /* values actually were received in the Begin() but we need to call this nop */
783     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
784   }
785   PetscFunctionReturn(0);
786 }
787 
788 EXTERN_C_BEGIN
789 #undef __FUNCT__
790 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
791 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
792 {
793   MPI_Comm       comm;
794   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
795   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
796   IS             Me,Notme;
797   PetscErrorCode ierr;
798   PetscInt       M,N,first,last,*notme,i;
799   PetscMPIInt    size;
800 
801   PetscFunctionBegin;
802 
803   /* Easy test: symmetric diagonal block */
804   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
805   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
806   if (!*f) PetscFunctionReturn(0);
807   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
808   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
809   if (size == 1) PetscFunctionReturn(0);
810 
811   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
812   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
813   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
814   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
815   for (i=0; i<first; i++) notme[i] = i;
816   for (i=last; i<M; i++) notme[i-last+first] = i;
817   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
818   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
819   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
820   Aoff = Aoffs[0];
821   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
822   Boff = Boffs[0];
823   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
824   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
825   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
826   ierr = ISDestroy(Me);CHKERRQ(ierr);
827   ierr = ISDestroy(Notme);CHKERRQ(ierr);
828 
829   PetscFunctionReturn(0);
830 }
831 EXTERN_C_END
832 
833 #undef __FUNCT__
834 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
835 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
836 {
837   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
838   PetscErrorCode ierr;
839 
840   PetscFunctionBegin;
841   /* do nondiagonal part */
842   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
843   /* send it on its way */
844   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
845   /* do local part */
846   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
847   /* receive remote parts */
848   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
849   PetscFunctionReturn(0);
850 }
851 
852 /*
853   This only works correctly for square matrices where the subblock A->A is the
854    diagonal block
855 */
856 #undef __FUNCT__
857 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
858 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
859 {
860   PetscErrorCode ierr;
861   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
862 
863   PetscFunctionBegin;
864   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
865   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
866   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
867   PetscFunctionReturn(0);
868 }
869 
870 #undef __FUNCT__
871 #define __FUNCT__ "MatScale_MPIAIJ"
872 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
873 {
874   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
875   PetscErrorCode ierr;
876 
877   PetscFunctionBegin;
878   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
879   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
880   PetscFunctionReturn(0);
881 }
882 
883 #undef __FUNCT__
884 #define __FUNCT__ "MatDestroy_MPIAIJ"
885 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
886 {
887   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
888   PetscErrorCode ierr;
889 
890   PetscFunctionBegin;
891 #if defined(PETSC_USE_LOG)
892   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
893 #endif
894   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
895   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
896   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
897   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
898 #if defined (PETSC_USE_CTABLE)
899   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
900 #else
901   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
902 #endif
903   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
904   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
905   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
906   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
907   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
908   ierr = PetscFree(aij);CHKERRQ(ierr);
909 
910   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
911   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
912   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
913   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
915   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
916   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
917   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
918   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
919   PetscFunctionReturn(0);
920 }
921 
922 #undef __FUNCT__
923 #define __FUNCT__ "MatView_MPIAIJ_Binary"
924 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
925 {
926   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
927   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
928   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
929   PetscErrorCode    ierr;
930   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
931   int               fd;
932   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
933   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
934   PetscScalar       *column_values;
935 
936   PetscFunctionBegin;
937   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
938   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
939   nz   = A->nz + B->nz;
940   if (!rank) {
941     header[0] = MAT_FILE_CLASSID;
942     header[1] = mat->rmap->N;
943     header[2] = mat->cmap->N;
944     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
945     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
946     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
947     /* get largest number of rows any processor has */
948     rlen = mat->rmap->n;
949     range = mat->rmap->range;
950     for (i=1; i<size; i++) {
951       rlen = PetscMax(rlen,range[i+1] - range[i]);
952     }
953   } else {
954     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
955     rlen = mat->rmap->n;
956   }
957 
958   /* load up the local row counts */
959   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
960   for (i=0; i<mat->rmap->n; i++) {
961     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
962   }
963 
964   /* store the row lengths to the file */
965   if (!rank) {
966     MPI_Status status;
967     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
968     for (i=1; i<size; i++) {
969       rlen = range[i+1] - range[i];
970       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
971       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
972     }
973   } else {
974     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
975   }
976   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
977 
978   /* load up the local column indices */
979   nzmax = nz; /* )th processor needs space a largest processor needs */
980   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
981   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
982   cnt  = 0;
983   for (i=0; i<mat->rmap->n; i++) {
984     for (j=B->i[i]; j<B->i[i+1]; j++) {
985       if ( (col = garray[B->j[j]]) > cstart) break;
986       column_indices[cnt++] = col;
987     }
988     for (k=A->i[i]; k<A->i[i+1]; k++) {
989       column_indices[cnt++] = A->j[k] + cstart;
990     }
991     for (; j<B->i[i+1]; j++) {
992       column_indices[cnt++] = garray[B->j[j]];
993     }
994   }
995   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
996 
997   /* store the column indices to the file */
998   if (!rank) {
999     MPI_Status status;
1000     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1001     for (i=1; i<size; i++) {
1002       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1003       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1004       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1005       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1006     }
1007   } else {
1008     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1009     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1010   }
1011   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1012 
1013   /* load up the local column values */
1014   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1015   cnt  = 0;
1016   for (i=0; i<mat->rmap->n; i++) {
1017     for (j=B->i[i]; j<B->i[i+1]; j++) {
1018       if ( garray[B->j[j]] > cstart) break;
1019       column_values[cnt++] = B->a[j];
1020     }
1021     for (k=A->i[i]; k<A->i[i+1]; k++) {
1022       column_values[cnt++] = A->a[k];
1023     }
1024     for (; j<B->i[i+1]; j++) {
1025       column_values[cnt++] = B->a[j];
1026     }
1027   }
1028   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1029 
1030   /* store the column values to the file */
1031   if (!rank) {
1032     MPI_Status status;
1033     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1034     for (i=1; i<size; i++) {
1035       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1036       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1037       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1038       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1039     }
1040   } else {
1041     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1042     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1043   }
1044   ierr = PetscFree(column_values);CHKERRQ(ierr);
1045   PetscFunctionReturn(0);
1046 }
1047 
1048 #undef __FUNCT__
1049 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1050 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1051 {
1052   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1053   PetscErrorCode    ierr;
1054   PetscMPIInt       rank = aij->rank,size = aij->size;
1055   PetscTruth        isdraw,iascii,isbinary;
1056   PetscViewer       sviewer;
1057   PetscViewerFormat format;
1058 
1059   PetscFunctionBegin;
1060   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1061   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1062   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1063   if (iascii) {
1064     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1065     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1066       MatInfo    info;
1067       PetscTruth inodes;
1068 
1069       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1070       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1071       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1072       if (!inodes) {
1073         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1074 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1075       } else {
1076         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1077 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1078       }
1079       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1080       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1081       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1082       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1083       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1084       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1085       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1086       PetscFunctionReturn(0);
1087     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1088       PetscInt   inodecount,inodelimit,*inodes;
1089       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1090       if (inodes) {
1091         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1092       } else {
1093         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1094       }
1095       PetscFunctionReturn(0);
1096     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1097       PetscFunctionReturn(0);
1098     }
1099   } else if (isbinary) {
1100     if (size == 1) {
1101       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1102       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1103     } else {
1104       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1105     }
1106     PetscFunctionReturn(0);
1107   } else if (isdraw) {
1108     PetscDraw  draw;
1109     PetscTruth isnull;
1110     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1111     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1112   }
1113 
1114   if (size == 1) {
1115     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1116     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1117   } else {
1118     /* assemble the entire matrix onto first processor. */
1119     Mat         A;
1120     Mat_SeqAIJ  *Aloc;
1121     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1122     MatScalar   *a;
1123 
1124     if (mat->rmap->N > 1024) {
1125       PetscTruth flg = PETSC_FALSE;
1126 
1127       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1128       if (!flg) {
1129         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1130       }
1131     }
1132 
1133     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1134     if (!rank) {
1135       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1136     } else {
1137       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1138     }
1139     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1140     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1141     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1142     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1143 
1144     /* copy over the A part */
1145     Aloc = (Mat_SeqAIJ*)aij->A->data;
1146     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1147     row = mat->rmap->rstart;
1148     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1149     for (i=0; i<m; i++) {
1150       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1151       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1152     }
1153     aj = Aloc->j;
1154     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1155 
1156     /* copy over the B part */
1157     Aloc = (Mat_SeqAIJ*)aij->B->data;
1158     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1159     row  = mat->rmap->rstart;
1160     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1161     ct   = cols;
1162     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1163     for (i=0; i<m; i++) {
1164       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1165       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1166     }
1167     ierr = PetscFree(ct);CHKERRQ(ierr);
1168     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1169     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1170     /*
1171        Everyone has to call to draw the matrix since the graphics waits are
1172        synchronized across all processors that share the PetscDraw object
1173     */
1174     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1175     if (!rank) {
1176       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1177       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1178     }
1179     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1180     ierr = MatDestroy(A);CHKERRQ(ierr);
1181   }
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 #undef __FUNCT__
1186 #define __FUNCT__ "MatView_MPIAIJ"
1187 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1188 {
1189   PetscErrorCode ierr;
1190   PetscTruth     iascii,isdraw,issocket,isbinary;
1191 
1192   PetscFunctionBegin;
1193   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1194   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1195   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1196   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1197   if (iascii || isdraw || isbinary || issocket) {
1198     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1199   } else {
1200     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1201   }
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 #undef __FUNCT__
1206 #define __FUNCT__ "MatSOR_MPIAIJ"
1207 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1208 {
1209   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1210   PetscErrorCode ierr;
1211   Vec            bb1 = 0;
1212   PetscTruth     hasop;
1213 
1214   PetscFunctionBegin;
1215   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1216     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1217   }
1218 
1219   if (flag == SOR_APPLY_UPPER) {
1220     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1221     PetscFunctionReturn(0);
1222   }
1223 
1224   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1225     if (flag & SOR_ZERO_INITIAL_GUESS) {
1226       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1227       its--;
1228     }
1229 
1230     while (its--) {
1231       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1232       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1233 
1234       /* update rhs: bb1 = bb - B*x */
1235       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1236       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1237 
1238       /* local sweep */
1239       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1240     }
1241   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1242     if (flag & SOR_ZERO_INITIAL_GUESS) {
1243       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1244       its--;
1245     }
1246     while (its--) {
1247       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1248       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1249 
1250       /* update rhs: bb1 = bb - B*x */
1251       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1252       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1253 
1254       /* local sweep */
1255       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1256     }
1257   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1258     if (flag & SOR_ZERO_INITIAL_GUESS) {
1259       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1260       its--;
1261     }
1262     while (its--) {
1263       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1264       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1265 
1266       /* update rhs: bb1 = bb - B*x */
1267       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1268       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1269 
1270       /* local sweep */
1271       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1272     }
1273   }  else if (flag & SOR_EISENSTAT) {
1274     Vec         xx1;
1275 
1276     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1277     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1278 
1279     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1280     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1281     if (!mat->diag) {
1282       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1283       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1284     }
1285     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1286     if (hasop) {
1287       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1288     } else {
1289       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1290     }
1291     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1292 
1293     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1294 
1295     /* local sweep */
1296     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1297     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1298     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1299   } else {
1300     SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1301   }
1302 
1303   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatPermute_MPIAIJ"
1309 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1310 {
1311   MPI_Comm       comm,pcomm;
1312   PetscInt       first,local_size,nrows;
1313   const PetscInt *rows;
1314   PetscMPIInt    size;
1315   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1316   PetscErrorCode ierr;
1317 
1318   PetscFunctionBegin;
1319   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1320   /* make a collective version of 'rowp' */
1321   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1322   if (pcomm==comm) {
1323     crowp = rowp;
1324   } else {
1325     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1326     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1327     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1328     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1329   }
1330   /* collect the global row permutation and invert it */
1331   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1332   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1333   if (pcomm!=comm) {
1334     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1335   }
1336   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1337   /* get the local target indices */
1338   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1339   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1340   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1341   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1342   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1343   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1344   /* the column permutation is so much easier;
1345      make a local version of 'colp' and invert it */
1346   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1347   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1348   if (size==1) {
1349     lcolp = colp;
1350   } else {
1351     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1352     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1353     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1354   }
1355   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1356   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1357   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1358   if (size>1) {
1359     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1360     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1361   }
1362   /* now we just get the submatrix */
1363   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1364   /* clean up */
1365   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1366   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1367   PetscFunctionReturn(0);
1368 }
1369 
1370 #undef __FUNCT__
1371 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1372 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1373 {
1374   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1375   Mat            A = mat->A,B = mat->B;
1376   PetscErrorCode ierr;
1377   PetscReal      isend[5],irecv[5];
1378 
1379   PetscFunctionBegin;
1380   info->block_size     = 1.0;
1381   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1382   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1383   isend[3] = info->memory;  isend[4] = info->mallocs;
1384   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1385   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1386   isend[3] += info->memory;  isend[4] += info->mallocs;
1387   if (flag == MAT_LOCAL) {
1388     info->nz_used      = isend[0];
1389     info->nz_allocated = isend[1];
1390     info->nz_unneeded  = isend[2];
1391     info->memory       = isend[3];
1392     info->mallocs      = isend[4];
1393   } else if (flag == MAT_GLOBAL_MAX) {
1394     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1395     info->nz_used      = irecv[0];
1396     info->nz_allocated = irecv[1];
1397     info->nz_unneeded  = irecv[2];
1398     info->memory       = irecv[3];
1399     info->mallocs      = irecv[4];
1400   } else if (flag == MAT_GLOBAL_SUM) {
1401     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1402     info->nz_used      = irecv[0];
1403     info->nz_allocated = irecv[1];
1404     info->nz_unneeded  = irecv[2];
1405     info->memory       = irecv[3];
1406     info->mallocs      = irecv[4];
1407   }
1408   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1409   info->fill_ratio_needed = 0;
1410   info->factor_mallocs    = 0;
1411 
1412   PetscFunctionReturn(0);
1413 }
1414 
1415 #undef __FUNCT__
1416 #define __FUNCT__ "MatSetOption_MPIAIJ"
1417 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1418 {
1419   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1420   PetscErrorCode ierr;
1421 
1422   PetscFunctionBegin;
1423   switch (op) {
1424   case MAT_NEW_NONZERO_LOCATIONS:
1425   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1426   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1427   case MAT_KEEP_NONZERO_PATTERN:
1428   case MAT_NEW_NONZERO_LOCATION_ERR:
1429   case MAT_USE_INODES:
1430   case MAT_IGNORE_ZERO_ENTRIES:
1431     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1432     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1433     break;
1434   case MAT_ROW_ORIENTED:
1435     a->roworiented = flg;
1436     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1437     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1438     break;
1439   case MAT_NEW_DIAGONALS:
1440     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1441     break;
1442   case MAT_IGNORE_OFF_PROC_ENTRIES:
1443     a->donotstash = PETSC_TRUE;
1444     break;
1445   case MAT_SYMMETRIC:
1446     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1447     break;
1448   case MAT_STRUCTURALLY_SYMMETRIC:
1449     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1450     break;
1451   case MAT_HERMITIAN:
1452     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1453     break;
1454   case MAT_SYMMETRY_ETERNAL:
1455     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1456     break;
1457   default:
1458     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1459   }
1460   PetscFunctionReturn(0);
1461 }
1462 
1463 #undef __FUNCT__
1464 #define __FUNCT__ "MatGetRow_MPIAIJ"
1465 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1466 {
1467   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1468   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1469   PetscErrorCode ierr;
1470   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1471   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1472   PetscInt       *cmap,*idx_p;
1473 
1474   PetscFunctionBegin;
1475   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1476   mat->getrowactive = PETSC_TRUE;
1477 
1478   if (!mat->rowvalues && (idx || v)) {
1479     /*
1480         allocate enough space to hold information from the longest row.
1481     */
1482     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1483     PetscInt   max = 1,tmp;
1484     for (i=0; i<matin->rmap->n; i++) {
1485       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1486       if (max < tmp) { max = tmp; }
1487     }
1488     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1489   }
1490 
1491   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1492   lrow = row - rstart;
1493 
1494   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1495   if (!v)   {pvA = 0; pvB = 0;}
1496   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1497   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1498   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1499   nztot = nzA + nzB;
1500 
1501   cmap  = mat->garray;
1502   if (v  || idx) {
1503     if (nztot) {
1504       /* Sort by increasing column numbers, assuming A and B already sorted */
1505       PetscInt imark = -1;
1506       if (v) {
1507         *v = v_p = mat->rowvalues;
1508         for (i=0; i<nzB; i++) {
1509           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1510           else break;
1511         }
1512         imark = i;
1513         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1514         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1515       }
1516       if (idx) {
1517         *idx = idx_p = mat->rowindices;
1518         if (imark > -1) {
1519           for (i=0; i<imark; i++) {
1520             idx_p[i] = cmap[cworkB[i]];
1521           }
1522         } else {
1523           for (i=0; i<nzB; i++) {
1524             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1525             else break;
1526           }
1527           imark = i;
1528         }
1529         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1530         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1531       }
1532     } else {
1533       if (idx) *idx = 0;
1534       if (v)   *v   = 0;
1535     }
1536   }
1537   *nz = nztot;
1538   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1539   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1540   PetscFunctionReturn(0);
1541 }
1542 
1543 #undef __FUNCT__
1544 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1545 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1546 {
1547   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1548 
1549   PetscFunctionBegin;
1550   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1551   aij->getrowactive = PETSC_FALSE;
1552   PetscFunctionReturn(0);
1553 }
1554 
1555 #undef __FUNCT__
1556 #define __FUNCT__ "MatNorm_MPIAIJ"
1557 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1558 {
1559   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1560   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1561   PetscErrorCode ierr;
1562   PetscInt       i,j,cstart = mat->cmap->rstart;
1563   PetscReal      sum = 0.0;
1564   MatScalar      *v;
1565 
1566   PetscFunctionBegin;
1567   if (aij->size == 1) {
1568     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1569   } else {
1570     if (type == NORM_FROBENIUS) {
1571       v = amat->a;
1572       for (i=0; i<amat->nz; i++) {
1573 #if defined(PETSC_USE_COMPLEX)
1574         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1575 #else
1576         sum += (*v)*(*v); v++;
1577 #endif
1578       }
1579       v = bmat->a;
1580       for (i=0; i<bmat->nz; i++) {
1581 #if defined(PETSC_USE_COMPLEX)
1582         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1583 #else
1584         sum += (*v)*(*v); v++;
1585 #endif
1586       }
1587       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1588       *norm = sqrt(*norm);
1589     } else if (type == NORM_1) { /* max column norm */
1590       PetscReal *tmp,*tmp2;
1591       PetscInt  *jj,*garray = aij->garray;
1592       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1593       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1594       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1595       *norm = 0.0;
1596       v = amat->a; jj = amat->j;
1597       for (j=0; j<amat->nz; j++) {
1598         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1599       }
1600       v = bmat->a; jj = bmat->j;
1601       for (j=0; j<bmat->nz; j++) {
1602         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1603       }
1604       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1605       for (j=0; j<mat->cmap->N; j++) {
1606         if (tmp2[j] > *norm) *norm = tmp2[j];
1607       }
1608       ierr = PetscFree(tmp);CHKERRQ(ierr);
1609       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1610     } else if (type == NORM_INFINITY) { /* max row norm */
1611       PetscReal ntemp = 0.0;
1612       for (j=0; j<aij->A->rmap->n; j++) {
1613         v = amat->a + amat->i[j];
1614         sum = 0.0;
1615         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1616           sum += PetscAbsScalar(*v); v++;
1617         }
1618         v = bmat->a + bmat->i[j];
1619         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1620           sum += PetscAbsScalar(*v); v++;
1621         }
1622         if (sum > ntemp) ntemp = sum;
1623       }
1624       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1625     } else {
1626       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1627     }
1628   }
1629   PetscFunctionReturn(0);
1630 }
1631 
1632 #undef __FUNCT__
1633 #define __FUNCT__ "MatTranspose_MPIAIJ"
1634 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1635 {
1636   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1637   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1638   PetscErrorCode ierr;
1639   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1640   PetscInt       cstart=A->cmap->rstart,ncol;
1641   Mat            B;
1642   MatScalar      *array;
1643 
1644   PetscFunctionBegin;
1645   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1646 
1647   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1648   ai = Aloc->i; aj = Aloc->j;
1649   bi = Bloc->i; bj = Bloc->j;
1650   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1651     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1652     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1653     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1654     for (i=0; i<ai[ma]; i++){
1655       d_nnz[aj[i]] ++;
1656       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1657     }
1658 
1659     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1660     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1661     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1662     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1663     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1664   } else {
1665     B = *matout;
1666   }
1667 
1668   /* copy over the A part */
1669   array = Aloc->a;
1670   row = A->rmap->rstart;
1671   for (i=0; i<ma; i++) {
1672     ncol = ai[i+1]-ai[i];
1673     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1674     row++; array += ncol; aj += ncol;
1675   }
1676   aj = Aloc->j;
1677   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1678 
1679   /* copy over the B part */
1680   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1681   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1682   array = Bloc->a;
1683   row = A->rmap->rstart;
1684   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1685   cols_tmp = cols;
1686   for (i=0; i<mb; i++) {
1687     ncol = bi[i+1]-bi[i];
1688     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1689     row++; array += ncol; cols_tmp += ncol;
1690   }
1691   ierr = PetscFree(cols);CHKERRQ(ierr);
1692 
1693   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1694   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1695   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1696     *matout = B;
1697   } else {
1698     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1699   }
1700   PetscFunctionReturn(0);
1701 }
1702 
1703 #undef __FUNCT__
1704 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1705 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1706 {
1707   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1708   Mat            a = aij->A,b = aij->B;
1709   PetscErrorCode ierr;
1710   PetscInt       s1,s2,s3;
1711 
1712   PetscFunctionBegin;
1713   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1714   if (rr) {
1715     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1716     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1717     /* Overlap communication with computation. */
1718     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1719   }
1720   if (ll) {
1721     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1722     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1723     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1724   }
1725   /* scale  the diagonal block */
1726   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1727 
1728   if (rr) {
1729     /* Do a scatter end and then right scale the off-diagonal block */
1730     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1731     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1732   }
1733 
1734   PetscFunctionReturn(0);
1735 }
1736 
1737 #undef __FUNCT__
1738 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1739 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1740 {
1741   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1742   PetscErrorCode ierr;
1743 
1744   PetscFunctionBegin;
1745   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1746   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1747   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
1748   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
1749   PetscFunctionReturn(0);
1750 }
1751 #undef __FUNCT__
1752 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1753 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1754 {
1755   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1756   PetscErrorCode ierr;
1757 
1758   PetscFunctionBegin;
1759   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1760   PetscFunctionReturn(0);
1761 }
1762 
1763 #undef __FUNCT__
1764 #define __FUNCT__ "MatEqual_MPIAIJ"
1765 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1766 {
1767   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1768   Mat            a,b,c,d;
1769   PetscTruth     flg;
1770   PetscErrorCode ierr;
1771 
1772   PetscFunctionBegin;
1773   a = matA->A; b = matA->B;
1774   c = matB->A; d = matB->B;
1775 
1776   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1777   if (flg) {
1778     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1779   }
1780   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1781   PetscFunctionReturn(0);
1782 }
1783 
1784 #undef __FUNCT__
1785 #define __FUNCT__ "MatCopy_MPIAIJ"
1786 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1787 {
1788   PetscErrorCode ierr;
1789   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1790   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1791 
1792   PetscFunctionBegin;
1793   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1794   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1795     /* because of the column compression in the off-processor part of the matrix a->B,
1796        the number of columns in a->B and b->B may be different, hence we cannot call
1797        the MatCopy() directly on the two parts. If need be, we can provide a more
1798        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1799        then copying the submatrices */
1800     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1801   } else {
1802     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1803     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1804   }
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 #undef __FUNCT__
1809 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1810 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1811 {
1812   PetscErrorCode ierr;
1813 
1814   PetscFunctionBegin;
1815   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1816   PetscFunctionReturn(0);
1817 }
1818 
1819 #undef __FUNCT__
1820 #define __FUNCT__ "MatAXPY_MPIAIJ"
1821 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1822 {
1823   PetscErrorCode ierr;
1824   PetscInt       i;
1825   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1826   PetscBLASInt   bnz,one=1;
1827   Mat_SeqAIJ     *x,*y;
1828 
1829   PetscFunctionBegin;
1830   if (str == SAME_NONZERO_PATTERN) {
1831     PetscScalar alpha = a;
1832     x = (Mat_SeqAIJ *)xx->A->data;
1833     y = (Mat_SeqAIJ *)yy->A->data;
1834     bnz = PetscBLASIntCast(x->nz);
1835     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1836     x = (Mat_SeqAIJ *)xx->B->data;
1837     y = (Mat_SeqAIJ *)yy->B->data;
1838     bnz = PetscBLASIntCast(x->nz);
1839     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1840   } else if (str == SUBSET_NONZERO_PATTERN) {
1841     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1842 
1843     x = (Mat_SeqAIJ *)xx->B->data;
1844     y = (Mat_SeqAIJ *)yy->B->data;
1845     if (y->xtoy && y->XtoY != xx->B) {
1846       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1847       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1848     }
1849     if (!y->xtoy) { /* get xtoy */
1850       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1851       y->XtoY = xx->B;
1852       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1853     }
1854     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1855   } else {
1856     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1857   }
1858   PetscFunctionReturn(0);
1859 }
1860 
1861 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1862 
1863 #undef __FUNCT__
1864 #define __FUNCT__ "MatConjugate_MPIAIJ"
1865 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1866 {
1867 #if defined(PETSC_USE_COMPLEX)
1868   PetscErrorCode ierr;
1869   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1870 
1871   PetscFunctionBegin;
1872   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1873   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1874 #else
1875   PetscFunctionBegin;
1876 #endif
1877   PetscFunctionReturn(0);
1878 }
1879 
1880 #undef __FUNCT__
1881 #define __FUNCT__ "MatRealPart_MPIAIJ"
1882 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1883 {
1884   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1885   PetscErrorCode ierr;
1886 
1887   PetscFunctionBegin;
1888   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1889   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 #undef __FUNCT__
1894 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1895 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1896 {
1897   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1898   PetscErrorCode ierr;
1899 
1900   PetscFunctionBegin;
1901   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1902   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1903   PetscFunctionReturn(0);
1904 }
1905 
1906 #ifdef PETSC_HAVE_PBGL
1907 
1908 #include <boost/parallel/mpi/bsp_process_group.hpp>
1909 #include <boost/graph/distributed/ilu_default_graph.hpp>
1910 #include <boost/graph/distributed/ilu_0_block.hpp>
1911 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1912 #include <boost/graph/distributed/petsc/interface.hpp>
1913 #include <boost/multi_array.hpp>
1914 #include <boost/parallel/distributed_property_map->hpp>
1915 
1916 #undef __FUNCT__
1917 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1918 /*
1919   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1920 */
1921 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1922 {
1923   namespace petsc = boost::distributed::petsc;
1924 
1925   namespace graph_dist = boost::graph::distributed;
1926   using boost::graph::distributed::ilu_default::process_group_type;
1927   using boost::graph::ilu_permuted;
1928 
1929   PetscTruth      row_identity, col_identity;
1930   PetscContainer  c;
1931   PetscInt        m, n, M, N;
1932   PetscErrorCode  ierr;
1933 
1934   PetscFunctionBegin;
1935   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1936   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1937   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1938   if (!row_identity || !col_identity) {
1939     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1940   }
1941 
1942   process_group_type pg;
1943   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1944   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1945   lgraph_type&   level_graph = *lgraph_p;
1946   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1947 
1948   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1949   ilu_permuted(level_graph);
1950 
1951   /* put together the new matrix */
1952   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1953   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1954   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1955   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1956   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1957   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1958   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1959 
1960   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1961   ierr = PetscContainerSetPointer(c, lgraph_p);
1962   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1963   PetscFunctionReturn(0);
1964 }
1965 
1966 #undef __FUNCT__
1967 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1968 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1969 {
1970   PetscFunctionBegin;
1971   PetscFunctionReturn(0);
1972 }
1973 
1974 #undef __FUNCT__
1975 #define __FUNCT__ "MatSolve_MPIAIJ"
1976 /*
1977   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1978 */
1979 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1980 {
1981   namespace graph_dist = boost::graph::distributed;
1982 
1983   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1984   lgraph_type*   lgraph_p;
1985   PetscContainer c;
1986   PetscErrorCode ierr;
1987 
1988   PetscFunctionBegin;
1989   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1990   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1991   ierr = VecCopy(b, x);CHKERRQ(ierr);
1992 
1993   PetscScalar* array_x;
1994   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1995   PetscInt sx;
1996   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1997 
1998   PetscScalar* array_b;
1999   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2000   PetscInt sb;
2001   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2002 
2003   lgraph_type&   level_graph = *lgraph_p;
2004   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2005 
2006   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2007   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2008                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2009 
2010   typedef boost::iterator_property_map<array_ref_type::iterator,
2011                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2012   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2013                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2014 
2015   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2016 
2017   PetscFunctionReturn(0);
2018 }
2019 #endif
2020 
2021 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2022   PetscInt       nzlocal,nsends,nrecvs;
2023   PetscMPIInt    *send_rank,*recv_rank;
2024   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2025   PetscScalar    *sbuf_a,**rbuf_a;
2026   PetscErrorCode (*MatDestroy)(Mat);
2027 } Mat_Redundant;
2028 
2029 #undef __FUNCT__
2030 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2031 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2032 {
2033   PetscErrorCode       ierr;
2034   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2035   PetscInt             i;
2036 
2037   PetscFunctionBegin;
2038   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2039   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2040   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2041   for (i=0; i<redund->nrecvs; i++){
2042     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2043     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2044   }
2045   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2046   ierr = PetscFree(redund);CHKERRQ(ierr);
2047   PetscFunctionReturn(0);
2048 }
2049 
2050 #undef __FUNCT__
2051 #define __FUNCT__ "MatDestroy_MatRedundant"
2052 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2053 {
2054   PetscErrorCode  ierr;
2055   PetscContainer  container;
2056   Mat_Redundant   *redund=PETSC_NULL;
2057 
2058   PetscFunctionBegin;
2059   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2060   if (container) {
2061     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2062   } else {
2063     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2064   }
2065   A->ops->destroy = redund->MatDestroy;
2066   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2067   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2068   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2069   PetscFunctionReturn(0);
2070 }
2071 
2072 #undef __FUNCT__
2073 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2074 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2075 {
2076   PetscMPIInt    rank,size;
2077   MPI_Comm       comm=((PetscObject)mat)->comm;
2078   PetscErrorCode ierr;
2079   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2080   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2081   PetscInt       *rowrange=mat->rmap->range;
2082   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2083   Mat            A=aij->A,B=aij->B,C=*matredundant;
2084   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2085   PetscScalar    *sbuf_a;
2086   PetscInt       nzlocal=a->nz+b->nz;
2087   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2088   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2089   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2090   MatScalar      *aworkA,*aworkB;
2091   PetscScalar    *vals;
2092   PetscMPIInt    tag1,tag2,tag3,imdex;
2093   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2094                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2095   MPI_Status     recv_status,*send_status;
2096   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2097   PetscInt       **rbuf_j=PETSC_NULL;
2098   PetscScalar    **rbuf_a=PETSC_NULL;
2099   Mat_Redundant  *redund=PETSC_NULL;
2100   PetscContainer container;
2101 
2102   PetscFunctionBegin;
2103   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2104   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2105 
2106   if (reuse == MAT_REUSE_MATRIX) {
2107     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2108     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2109     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2110     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2111     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2112     if (container) {
2113       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2114     } else {
2115       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2116     }
2117     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2118 
2119     nsends    = redund->nsends;
2120     nrecvs    = redund->nrecvs;
2121     send_rank = redund->send_rank;
2122     recv_rank = redund->recv_rank;
2123     sbuf_nz   = redund->sbuf_nz;
2124     rbuf_nz   = redund->rbuf_nz;
2125     sbuf_j    = redund->sbuf_j;
2126     sbuf_a    = redund->sbuf_a;
2127     rbuf_j    = redund->rbuf_j;
2128     rbuf_a    = redund->rbuf_a;
2129   }
2130 
2131   if (reuse == MAT_INITIAL_MATRIX){
2132     PetscMPIInt  subrank,subsize;
2133     PetscInt     nleftover,np_subcomm;
2134     /* get the destination processors' id send_rank, nsends and nrecvs */
2135     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2136     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2137     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2138     np_subcomm = size/nsubcomm;
2139     nleftover  = size - nsubcomm*np_subcomm;
2140     nsends = 0; nrecvs = 0;
2141     for (i=0; i<size; i++){ /* i=rank*/
2142       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2143         send_rank[nsends] = i; nsends++;
2144         recv_rank[nrecvs++] = i;
2145       }
2146     }
2147     if (rank >= size - nleftover){/* this proc is a leftover processor */
2148       i = size-nleftover-1;
2149       j = 0;
2150       while (j < nsubcomm - nleftover){
2151         send_rank[nsends++] = i;
2152         i--; j++;
2153       }
2154     }
2155 
2156     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2157       for (i=0; i<nleftover; i++){
2158         recv_rank[nrecvs++] = size-nleftover+i;
2159       }
2160     }
2161 
2162     /* allocate sbuf_j, sbuf_a */
2163     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2164     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2165     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2166   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2167 
2168   /* copy mat's local entries into the buffers */
2169   if (reuse == MAT_INITIAL_MATRIX){
2170     rownz_max = 0;
2171     rptr = sbuf_j;
2172     cols = sbuf_j + rend-rstart + 1;
2173     vals = sbuf_a;
2174     rptr[0] = 0;
2175     for (i=0; i<rend-rstart; i++){
2176       row = i + rstart;
2177       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2178       ncols  = nzA + nzB;
2179       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2180       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2181       /* load the column indices for this row into cols */
2182       lwrite = 0;
2183       for (l=0; l<nzB; l++) {
2184         if ((ctmp = bmap[cworkB[l]]) < cstart){
2185           vals[lwrite]   = aworkB[l];
2186           cols[lwrite++] = ctmp;
2187         }
2188       }
2189       for (l=0; l<nzA; l++){
2190         vals[lwrite]   = aworkA[l];
2191         cols[lwrite++] = cstart + cworkA[l];
2192       }
2193       for (l=0; l<nzB; l++) {
2194         if ((ctmp = bmap[cworkB[l]]) >= cend){
2195           vals[lwrite]   = aworkB[l];
2196           cols[lwrite++] = ctmp;
2197         }
2198       }
2199       vals += ncols;
2200       cols += ncols;
2201       rptr[i+1] = rptr[i] + ncols;
2202       if (rownz_max < ncols) rownz_max = ncols;
2203     }
2204     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2205   } else { /* only copy matrix values into sbuf_a */
2206     rptr = sbuf_j;
2207     vals = sbuf_a;
2208     rptr[0] = 0;
2209     for (i=0; i<rend-rstart; i++){
2210       row = i + rstart;
2211       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2212       ncols  = nzA + nzB;
2213       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2214       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2215       lwrite = 0;
2216       for (l=0; l<nzB; l++) {
2217         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2218       }
2219       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2220       for (l=0; l<nzB; l++) {
2221         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2222       }
2223       vals += ncols;
2224       rptr[i+1] = rptr[i] + ncols;
2225     }
2226   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2227 
2228   /* send nzlocal to others, and recv other's nzlocal */
2229   /*--------------------------------------------------*/
2230   if (reuse == MAT_INITIAL_MATRIX){
2231     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2232     s_waits2 = s_waits3 + nsends;
2233     s_waits1 = s_waits2 + nsends;
2234     r_waits1 = s_waits1 + nsends;
2235     r_waits2 = r_waits1 + nrecvs;
2236     r_waits3 = r_waits2 + nrecvs;
2237   } else {
2238     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2239     r_waits3 = s_waits3 + nsends;
2240   }
2241 
2242   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2243   if (reuse == MAT_INITIAL_MATRIX){
2244     /* get new tags to keep the communication clean */
2245     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2246     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2247     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2248 
2249     /* post receives of other's nzlocal */
2250     for (i=0; i<nrecvs; i++){
2251       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2252     }
2253     /* send nzlocal to others */
2254     for (i=0; i<nsends; i++){
2255       sbuf_nz[i] = nzlocal;
2256       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2257     }
2258     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2259     count = nrecvs;
2260     while (count) {
2261       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2262       recv_rank[imdex] = recv_status.MPI_SOURCE;
2263       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2264       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2265 
2266       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2267       rbuf_nz[imdex] += i + 2;
2268       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2269       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2270       count--;
2271     }
2272     /* wait on sends of nzlocal */
2273     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2274     /* send mat->i,j to others, and recv from other's */
2275     /*------------------------------------------------*/
2276     for (i=0; i<nsends; i++){
2277       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2278       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2279     }
2280     /* wait on receives of mat->i,j */
2281     /*------------------------------*/
2282     count = nrecvs;
2283     while (count) {
2284       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2285       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2286       count--;
2287     }
2288     /* wait on sends of mat->i,j */
2289     /*---------------------------*/
2290     if (nsends) {
2291       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2292     }
2293   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2294 
2295   /* post receives, send and receive mat->a */
2296   /*----------------------------------------*/
2297   for (imdex=0; imdex<nrecvs; imdex++) {
2298     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2299   }
2300   for (i=0; i<nsends; i++){
2301     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2302   }
2303   count = nrecvs;
2304   while (count) {
2305     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2306     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2307     count--;
2308   }
2309   if (nsends) {
2310     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2311   }
2312 
2313   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2314 
2315   /* create redundant matrix */
2316   /*-------------------------*/
2317   if (reuse == MAT_INITIAL_MATRIX){
2318     /* compute rownz_max for preallocation */
2319     for (imdex=0; imdex<nrecvs; imdex++){
2320       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2321       rptr = rbuf_j[imdex];
2322       for (i=0; i<j; i++){
2323         ncols = rptr[i+1] - rptr[i];
2324         if (rownz_max < ncols) rownz_max = ncols;
2325       }
2326     }
2327 
2328     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2329     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2330     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2331     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2332     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2333   } else {
2334     C = *matredundant;
2335   }
2336 
2337   /* insert local matrix entries */
2338   rptr = sbuf_j;
2339   cols = sbuf_j + rend-rstart + 1;
2340   vals = sbuf_a;
2341   for (i=0; i<rend-rstart; i++){
2342     row   = i + rstart;
2343     ncols = rptr[i+1] - rptr[i];
2344     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2345     vals += ncols;
2346     cols += ncols;
2347   }
2348   /* insert received matrix entries */
2349   for (imdex=0; imdex<nrecvs; imdex++){
2350     rstart = rowrange[recv_rank[imdex]];
2351     rend   = rowrange[recv_rank[imdex]+1];
2352     rptr = rbuf_j[imdex];
2353     cols = rbuf_j[imdex] + rend-rstart + 1;
2354     vals = rbuf_a[imdex];
2355     for (i=0; i<rend-rstart; i++){
2356       row   = i + rstart;
2357       ncols = rptr[i+1] - rptr[i];
2358       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2359       vals += ncols;
2360       cols += ncols;
2361     }
2362   }
2363   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2364   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2365   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2366   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2367   if (reuse == MAT_INITIAL_MATRIX){
2368     PetscContainer container;
2369     *matredundant = C;
2370     /* create a supporting struct and attach it to C for reuse */
2371     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2372     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2373     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2374     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2375     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2376 
2377     redund->nzlocal = nzlocal;
2378     redund->nsends  = nsends;
2379     redund->nrecvs  = nrecvs;
2380     redund->send_rank = send_rank;
2381     redund->recv_rank = recv_rank;
2382     redund->sbuf_nz = sbuf_nz;
2383     redund->rbuf_nz = rbuf_nz;
2384     redund->sbuf_j  = sbuf_j;
2385     redund->sbuf_a  = sbuf_a;
2386     redund->rbuf_j  = rbuf_j;
2387     redund->rbuf_a  = rbuf_a;
2388 
2389     redund->MatDestroy = C->ops->destroy;
2390     C->ops->destroy    = MatDestroy_MatRedundant;
2391   }
2392   PetscFunctionReturn(0);
2393 }
2394 
2395 #undef __FUNCT__
2396 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2397 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2398 {
2399   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2400   PetscErrorCode ierr;
2401   PetscInt       i,*idxb = 0;
2402   PetscScalar    *va,*vb;
2403   Vec            vtmp;
2404 
2405   PetscFunctionBegin;
2406   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2407   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2408   if (idx) {
2409     for (i=0; i<A->rmap->n; i++) {
2410       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2411     }
2412   }
2413 
2414   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2415   if (idx) {
2416     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2417   }
2418   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2419   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2420 
2421   for (i=0; i<A->rmap->n; i++){
2422     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2423       va[i] = vb[i];
2424       if (idx) idx[i] = a->garray[idxb[i]];
2425     }
2426   }
2427 
2428   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2429   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2430   if (idxb) {
2431     ierr = PetscFree(idxb);CHKERRQ(ierr);
2432   }
2433   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2434   PetscFunctionReturn(0);
2435 }
2436 
2437 #undef __FUNCT__
2438 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2439 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2440 {
2441   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2442   PetscErrorCode ierr;
2443   PetscInt       i,*idxb = 0;
2444   PetscScalar    *va,*vb;
2445   Vec            vtmp;
2446 
2447   PetscFunctionBegin;
2448   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2449   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2450   if (idx) {
2451     for (i=0; i<A->cmap->n; i++) {
2452       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2453     }
2454   }
2455 
2456   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2457   if (idx) {
2458     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2459   }
2460   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2461   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2462 
2463   for (i=0; i<A->rmap->n; i++){
2464     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2465       va[i] = vb[i];
2466       if (idx) idx[i] = a->garray[idxb[i]];
2467     }
2468   }
2469 
2470   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2471   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2472   if (idxb) {
2473     ierr = PetscFree(idxb);CHKERRQ(ierr);
2474   }
2475   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2476   PetscFunctionReturn(0);
2477 }
2478 
2479 #undef __FUNCT__
2480 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2481 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2482 {
2483   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2484   PetscInt       n      = A->rmap->n;
2485   PetscInt       cstart = A->cmap->rstart;
2486   PetscInt      *cmap   = mat->garray;
2487   PetscInt      *diagIdx, *offdiagIdx;
2488   Vec            diagV, offdiagV;
2489   PetscScalar   *a, *diagA, *offdiagA;
2490   PetscInt       r;
2491   PetscErrorCode ierr;
2492 
2493   PetscFunctionBegin;
2494   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2495   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2496   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2497   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2498   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2499   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2500   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2501   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2502   for(r = 0; r < n; ++r) {
2503     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2504       a[r]   = diagA[r];
2505       idx[r] = cstart + diagIdx[r];
2506     } else {
2507       a[r]   = offdiagA[r];
2508       idx[r] = cmap[offdiagIdx[r]];
2509     }
2510   }
2511   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2512   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2513   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2514   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2515   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2516   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2517   PetscFunctionReturn(0);
2518 }
2519 
2520 #undef __FUNCT__
2521 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2522 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2523 {
2524   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2525   PetscInt       n      = A->rmap->n;
2526   PetscInt       cstart = A->cmap->rstart;
2527   PetscInt      *cmap   = mat->garray;
2528   PetscInt      *diagIdx, *offdiagIdx;
2529   Vec            diagV, offdiagV;
2530   PetscScalar   *a, *diagA, *offdiagA;
2531   PetscInt       r;
2532   PetscErrorCode ierr;
2533 
2534   PetscFunctionBegin;
2535   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2536   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2537   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2538   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2539   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2540   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2541   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2542   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2543   for(r = 0; r < n; ++r) {
2544     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2545       a[r]   = diagA[r];
2546       idx[r] = cstart + diagIdx[r];
2547     } else {
2548       a[r]   = offdiagA[r];
2549       idx[r] = cmap[offdiagIdx[r]];
2550     }
2551   }
2552   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2553   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2554   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2555   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2556   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2557   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 #undef __FUNCT__
2562 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2563 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2564 {
2565   PetscErrorCode ierr;
2566   Mat            *dummy;
2567 
2568   PetscFunctionBegin;
2569   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2570   *newmat = *dummy;
2571   ierr = PetscFree(dummy);CHKERRQ(ierr);
2572   PetscFunctionReturn(0);
2573 }
2574 
2575 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2576 /* -------------------------------------------------------------------*/
2577 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2578        MatGetRow_MPIAIJ,
2579        MatRestoreRow_MPIAIJ,
2580        MatMult_MPIAIJ,
2581 /* 4*/ MatMultAdd_MPIAIJ,
2582        MatMultTranspose_MPIAIJ,
2583        MatMultTransposeAdd_MPIAIJ,
2584 #ifdef PETSC_HAVE_PBGL
2585        MatSolve_MPIAIJ,
2586 #else
2587        0,
2588 #endif
2589        0,
2590        0,
2591 /*10*/ 0,
2592        0,
2593        0,
2594        MatSOR_MPIAIJ,
2595        MatTranspose_MPIAIJ,
2596 /*15*/ MatGetInfo_MPIAIJ,
2597        MatEqual_MPIAIJ,
2598        MatGetDiagonal_MPIAIJ,
2599        MatDiagonalScale_MPIAIJ,
2600        MatNorm_MPIAIJ,
2601 /*20*/ MatAssemblyBegin_MPIAIJ,
2602        MatAssemblyEnd_MPIAIJ,
2603        MatSetOption_MPIAIJ,
2604        MatZeroEntries_MPIAIJ,
2605 /*24*/ MatZeroRows_MPIAIJ,
2606        0,
2607 #ifdef PETSC_HAVE_PBGL
2608        0,
2609 #else
2610        0,
2611 #endif
2612        0,
2613        0,
2614 /*29*/ MatSetUpPreallocation_MPIAIJ,
2615 #ifdef PETSC_HAVE_PBGL
2616        0,
2617 #else
2618        0,
2619 #endif
2620        0,
2621        0,
2622        0,
2623 /*34*/ MatDuplicate_MPIAIJ,
2624        0,
2625        0,
2626        0,
2627        0,
2628 /*39*/ MatAXPY_MPIAIJ,
2629        MatGetSubMatrices_MPIAIJ,
2630        MatIncreaseOverlap_MPIAIJ,
2631        MatGetValues_MPIAIJ,
2632        MatCopy_MPIAIJ,
2633 /*44*/ MatGetRowMax_MPIAIJ,
2634        MatScale_MPIAIJ,
2635        0,
2636        0,
2637        0,
2638 /*49*/ MatSetBlockSize_MPIAIJ,
2639        0,
2640        0,
2641        0,
2642        0,
2643 /*54*/ MatFDColoringCreate_MPIAIJ,
2644        0,
2645        MatSetUnfactored_MPIAIJ,
2646        MatPermute_MPIAIJ,
2647        0,
2648 /*59*/ MatGetSubMatrix_MPIAIJ,
2649        MatDestroy_MPIAIJ,
2650        MatView_MPIAIJ,
2651        0,
2652        0,
2653 /*64*/ 0,
2654        0,
2655        0,
2656        0,
2657        0,
2658 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2659        MatGetRowMinAbs_MPIAIJ,
2660        0,
2661        MatSetColoring_MPIAIJ,
2662 #if defined(PETSC_HAVE_ADIC)
2663        MatSetValuesAdic_MPIAIJ,
2664 #else
2665        0,
2666 #endif
2667        MatSetValuesAdifor_MPIAIJ,
2668 /*75*/ MatFDColoringApply_AIJ,
2669        0,
2670        0,
2671        0,
2672        0,
2673 /*80*/ 0,
2674        0,
2675        0,
2676 /*83*/ MatLoad_MPIAIJ,
2677        0,
2678        0,
2679        0,
2680        0,
2681        0,
2682 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2683        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2684        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2685        MatPtAP_Basic,
2686        MatPtAPSymbolic_MPIAIJ,
2687 /*94*/ MatPtAPNumeric_MPIAIJ,
2688        0,
2689        0,
2690        0,
2691        0,
2692 /*99*/ 0,
2693        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2694        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2695        MatConjugate_MPIAIJ,
2696        0,
2697 /*104*/MatSetValuesRow_MPIAIJ,
2698        MatRealPart_MPIAIJ,
2699        MatImaginaryPart_MPIAIJ,
2700        0,
2701        0,
2702 /*109*/0,
2703        MatGetRedundantMatrix_MPIAIJ,
2704        MatGetRowMin_MPIAIJ,
2705        0,
2706        0,
2707 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2708        0,
2709        0,
2710        0,
2711        0,
2712        0
2713 };
2714 
2715 /* ----------------------------------------------------------------------------------------*/
2716 
2717 EXTERN_C_BEGIN
2718 #undef __FUNCT__
2719 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2720 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2721 {
2722   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2723   PetscErrorCode ierr;
2724 
2725   PetscFunctionBegin;
2726   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2727   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2728   PetscFunctionReturn(0);
2729 }
2730 EXTERN_C_END
2731 
2732 EXTERN_C_BEGIN
2733 #undef __FUNCT__
2734 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2735 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2736 {
2737   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2738   PetscErrorCode ierr;
2739 
2740   PetscFunctionBegin;
2741   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2742   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2743   PetscFunctionReturn(0);
2744 }
2745 EXTERN_C_END
2746 
2747 #include "petscpc.h"
2748 EXTERN_C_BEGIN
2749 #undef __FUNCT__
2750 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2751 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2752 {
2753   Mat_MPIAIJ     *b;
2754   PetscErrorCode ierr;
2755   PetscInt       i;
2756 
2757   PetscFunctionBegin;
2758   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2759   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2760   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2761   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2762 
2763   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2764   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2765   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2766   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2767   if (d_nnz) {
2768     for (i=0; i<B->rmap->n; i++) {
2769       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2770     }
2771   }
2772   if (o_nnz) {
2773     for (i=0; i<B->rmap->n; i++) {
2774       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2775     }
2776   }
2777   b = (Mat_MPIAIJ*)B->data;
2778 
2779   if (!B->preallocated) {
2780     /* Explicitly create 2 MATSEQAIJ matrices. */
2781     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2782     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2783     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2784     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2785     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2786     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2787     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2788     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2789   }
2790 
2791   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2792   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2793   B->preallocated = PETSC_TRUE;
2794   PetscFunctionReturn(0);
2795 }
2796 EXTERN_C_END
2797 
2798 #undef __FUNCT__
2799 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2800 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2801 {
2802   Mat            mat;
2803   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2804   PetscErrorCode ierr;
2805 
2806   PetscFunctionBegin;
2807   *newmat       = 0;
2808   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2809   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2810   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2811   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2812   a    = (Mat_MPIAIJ*)mat->data;
2813 
2814   mat->factortype    = matin->factortype;
2815   mat->rmap->bs      = matin->rmap->bs;
2816   mat->assembled    = PETSC_TRUE;
2817   mat->insertmode   = NOT_SET_VALUES;
2818   mat->preallocated = PETSC_TRUE;
2819 
2820   a->size           = oldmat->size;
2821   a->rank           = oldmat->rank;
2822   a->donotstash     = oldmat->donotstash;
2823   a->roworiented    = oldmat->roworiented;
2824   a->rowindices     = 0;
2825   a->rowvalues      = 0;
2826   a->getrowactive   = PETSC_FALSE;
2827 
2828   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2829   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2830 
2831   if (oldmat->colmap) {
2832 #if defined (PETSC_USE_CTABLE)
2833     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2834 #else
2835     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2836     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2837     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2838 #endif
2839   } else a->colmap = 0;
2840   if (oldmat->garray) {
2841     PetscInt len;
2842     len  = oldmat->B->cmap->n;
2843     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2844     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2845     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2846   } else a->garray = 0;
2847 
2848   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2849   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2850   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2851   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2852   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2853   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2854   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2855   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2856   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2857   *newmat = mat;
2858   PetscFunctionReturn(0);
2859 }
2860 
2861 #undef __FUNCT__
2862 #define __FUNCT__ "MatLoad_MPIAIJ"
2863 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2864 {
2865   Mat            A;
2866   PetscScalar    *vals,*svals;
2867   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2868   MPI_Status     status;
2869   PetscErrorCode ierr;
2870   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz;
2871   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2872   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2873   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2874   PetscInt       cend,cstart,n,*rowners;
2875   int            fd;
2876 
2877   PetscFunctionBegin;
2878   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2879   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2880   if (!rank) {
2881     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2882     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2883     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2884   }
2885 
2886   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2887   M = header[1]; N = header[2];
2888   /* determine ownership of all rows */
2889   m    = M/size + ((M % size) > rank);
2890   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2891   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2892 
2893   /* First process needs enough room for process with most rows */
2894   if (!rank) {
2895     mmax       = rowners[1];
2896     for (i=2; i<size; i++) {
2897       mmax = PetscMax(mmax,rowners[i]);
2898     }
2899   } else mmax = m;
2900 
2901   rowners[0] = 0;
2902   for (i=2; i<=size; i++) {
2903     rowners[i] += rowners[i-1];
2904   }
2905   rstart = rowners[rank];
2906   rend   = rowners[rank+1];
2907 
2908   /* distribute row lengths to all processors */
2909   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2910   if (!rank) {
2911     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2912     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2913     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2914     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2915     for (j=0; j<m; j++) {
2916       procsnz[0] += ourlens[j];
2917     }
2918     for (i=1; i<size; i++) {
2919       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2920       /* calculate the number of nonzeros on each processor */
2921       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2922         procsnz[i] += rowlengths[j];
2923       }
2924       mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]);
2925       ierr   = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2926     }
2927     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2928   } else {
2929     mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr);
2930     ierr   = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2931   }
2932 
2933   if (!rank) {
2934     /* determine max buffer needed and allocate it */
2935     maxnz = 0;
2936     for (i=0; i<size; i++) {
2937       maxnz = PetscMax(maxnz,procsnz[i]);
2938     }
2939     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2940 
2941     /* read in my part of the matrix column indices  */
2942     nz   = procsnz[0];
2943     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2944     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2945 
2946     /* read in every one elses and ship off */
2947     for (i=1; i<size; i++) {
2948       nz     = procsnz[i];
2949       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2950       mpicnt = PetscMPIIntCast(nz);
2951       ierr   = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2952     }
2953     ierr = PetscFree(cols);CHKERRQ(ierr);
2954   } else {
2955     /* determine buffer space needed for message */
2956     nz = 0;
2957     for (i=0; i<m; i++) {
2958       nz += ourlens[i];
2959     }
2960     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2961 
2962     /* receive message of column indices*/
2963     mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr);
2964     ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2965     ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr);
2966     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
2967     else if (mpimaxnz < 0) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
2968     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
2969   }
2970 
2971   /* determine column ownership if matrix is not square */
2972   if (N != M) {
2973     n      = N/size + ((N % size) > rank);
2974     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2975     cstart = cend - n;
2976   } else {
2977     cstart = rstart;
2978     cend   = rend;
2979     n      = cend - cstart;
2980   }
2981 
2982   /* loop over local rows, determining number of off diagonal entries */
2983   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2984   jj = 0;
2985   for (i=0; i<m; i++) {
2986     for (j=0; j<ourlens[i]; j++) {
2987       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2988       jj++;
2989     }
2990   }
2991 
2992   /* create our matrix */
2993   for (i=0; i<m; i++) {
2994     ourlens[i] -= offlens[i];
2995   }
2996   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2997   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2998   ierr = MatSetType(A,type);CHKERRQ(ierr);
2999   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
3000 
3001   for (i=0; i<m; i++) {
3002     ourlens[i] += offlens[i];
3003   }
3004 
3005   if (!rank) {
3006     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3007 
3008     /* read in my part of the matrix numerical values  */
3009     nz   = procsnz[0];
3010     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3011 
3012     /* insert into matrix */
3013     jj      = rstart;
3014     smycols = mycols;
3015     svals   = vals;
3016     for (i=0; i<m; i++) {
3017       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3018       smycols += ourlens[i];
3019       svals   += ourlens[i];
3020       jj++;
3021     }
3022 
3023     /* read in other processors and ship out */
3024     for (i=1; i<size; i++) {
3025       nz     = procsnz[i];
3026       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3027       mpicnt = PetscMPIIntCast(nz);
3028       ierr   = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3029     }
3030     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3031   } else {
3032     /* receive numeric values */
3033     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3034 
3035     /* receive message of values*/
3036     mpicnt = PetscMPIIntCast(nz);
3037     ierr   = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
3038     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr);
3039     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
3040     else if (mpimaxnz < 0) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
3041     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
3042 
3043     /* insert into matrix */
3044     jj      = rstart;
3045     smycols = mycols;
3046     svals   = vals;
3047     for (i=0; i<m; i++) {
3048       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3049       smycols += ourlens[i];
3050       svals   += ourlens[i];
3051       jj++;
3052     }
3053   }
3054   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3055   ierr = PetscFree(vals);CHKERRQ(ierr);
3056   ierr = PetscFree(mycols);CHKERRQ(ierr);
3057   ierr = PetscFree(rowners);CHKERRQ(ierr);
3058 
3059   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3060   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3061   *newmat = A;
3062   PetscFunctionReturn(0);
3063 }
3064 
3065 #undef __FUNCT__
3066 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3067 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3068 {
3069   PetscErrorCode ierr;
3070   IS             iscol_local;
3071   PetscInt       csize;
3072 
3073   PetscFunctionBegin;
3074   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3075   if (call == MAT_REUSE_MATRIX) {
3076     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3077     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3078   } else {
3079     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3080   }
3081   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3082   if (call == MAT_INITIAL_MATRIX) {
3083     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3084     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3085   }
3086   PetscFunctionReturn(0);
3087 }
3088 
3089 #undef __FUNCT__
3090 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3091 /*
3092     Not great since it makes two copies of the submatrix, first an SeqAIJ
3093   in local and then by concatenating the local matrices the end result.
3094   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3095 
3096   Note: This requires a sequential iscol with all indices.
3097 */
3098 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3099 {
3100   PetscErrorCode ierr;
3101   PetscMPIInt    rank,size;
3102   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3103   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3104   Mat            *local,M,Mreuse;
3105   MatScalar      *vwork,*aa;
3106   MPI_Comm       comm = ((PetscObject)mat)->comm;
3107   Mat_SeqAIJ     *aij;
3108 
3109 
3110   PetscFunctionBegin;
3111   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3112   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3113 
3114   if (call ==  MAT_REUSE_MATRIX) {
3115     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3116     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3117     local = &Mreuse;
3118     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3119   } else {
3120     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3121     Mreuse = *local;
3122     ierr   = PetscFree(local);CHKERRQ(ierr);
3123   }
3124 
3125   /*
3126       m - number of local rows
3127       n - number of columns (same on all processors)
3128       rstart - first row in new global matrix generated
3129   */
3130   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3131   if (call == MAT_INITIAL_MATRIX) {
3132     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3133     ii  = aij->i;
3134     jj  = aij->j;
3135 
3136     /*
3137         Determine the number of non-zeros in the diagonal and off-diagonal
3138         portions of the matrix in order to do correct preallocation
3139     */
3140 
3141     /* first get start and end of "diagonal" columns */
3142     if (csize == PETSC_DECIDE) {
3143       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3144       if (mglobal == n) { /* square matrix */
3145 	nlocal = m;
3146       } else {
3147         nlocal = n/size + ((n % size) > rank);
3148       }
3149     } else {
3150       nlocal = csize;
3151     }
3152     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3153     rstart = rend - nlocal;
3154     if (rank == size - 1 && rend != n) {
3155       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3156     }
3157 
3158     /* next, compute all the lengths */
3159     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3160     olens = dlens + m;
3161     for (i=0; i<m; i++) {
3162       jend = ii[i+1] - ii[i];
3163       olen = 0;
3164       dlen = 0;
3165       for (j=0; j<jend; j++) {
3166         if (*jj < rstart || *jj >= rend) olen++;
3167         else dlen++;
3168         jj++;
3169       }
3170       olens[i] = olen;
3171       dlens[i] = dlen;
3172     }
3173     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3174     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3175     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3176     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3177     ierr = PetscFree(dlens);CHKERRQ(ierr);
3178   } else {
3179     PetscInt ml,nl;
3180 
3181     M = *newmat;
3182     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3183     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3184     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3185     /*
3186          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3187        rather than the slower MatSetValues().
3188     */
3189     M->was_assembled = PETSC_TRUE;
3190     M->assembled     = PETSC_FALSE;
3191   }
3192   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3193   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3194   ii  = aij->i;
3195   jj  = aij->j;
3196   aa  = aij->a;
3197   for (i=0; i<m; i++) {
3198     row   = rstart + i;
3199     nz    = ii[i+1] - ii[i];
3200     cwork = jj;     jj += nz;
3201     vwork = aa;     aa += nz;
3202     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3203   }
3204 
3205   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3206   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3207   *newmat = M;
3208 
3209   /* save submatrix used in processor for next request */
3210   if (call ==  MAT_INITIAL_MATRIX) {
3211     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3212     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3213   }
3214 
3215   PetscFunctionReturn(0);
3216 }
3217 
3218 EXTERN_C_BEGIN
3219 #undef __FUNCT__
3220 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3221 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3222 {
3223   PetscInt       m,cstart, cend,j,nnz,i,d;
3224   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3225   const PetscInt *JJ;
3226   PetscScalar    *values;
3227   PetscErrorCode ierr;
3228 
3229   PetscFunctionBegin;
3230   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3231 
3232   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3233   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3234   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3235   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3236   m      = B->rmap->n;
3237   cstart = B->cmap->rstart;
3238   cend   = B->cmap->rend;
3239   rstart = B->rmap->rstart;
3240 
3241   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3242 
3243 #if defined(PETSC_USE_DEBUGGING)
3244   for (i=0; i<m; i++) {
3245     nnz     = Ii[i+1]- Ii[i];
3246     JJ      = J + Ii[i];
3247     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3248     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3249     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3250   }
3251 #endif
3252 
3253   for (i=0; i<m; i++) {
3254     nnz     = Ii[i+1]- Ii[i];
3255     JJ      = J + Ii[i];
3256     nnz_max = PetscMax(nnz_max,nnz);
3257     d       = 0;
3258     for (j=0; j<nnz; j++) {
3259       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3260     }
3261     d_nnz[i] = d;
3262     o_nnz[i] = nnz - d;
3263   }
3264   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3265   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3266 
3267   if (v) values = (PetscScalar*)v;
3268   else {
3269     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3270     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3271   }
3272 
3273   for (i=0; i<m; i++) {
3274     ii   = i + rstart;
3275     nnz  = Ii[i+1]- Ii[i];
3276     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3277   }
3278   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3279   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3280 
3281   if (!v) {
3282     ierr = PetscFree(values);CHKERRQ(ierr);
3283   }
3284   PetscFunctionReturn(0);
3285 }
3286 EXTERN_C_END
3287 
3288 #undef __FUNCT__
3289 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3290 /*@
3291    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3292    (the default parallel PETSc format).
3293 
3294    Collective on MPI_Comm
3295 
3296    Input Parameters:
3297 +  B - the matrix
3298 .  i - the indices into j for the start of each local row (starts with zero)
3299 .  j - the column indices for each local row (starts with zero)
3300 -  v - optional values in the matrix
3301 
3302    Level: developer
3303 
3304    Notes:
3305        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3306      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3307      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3308 
3309        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3310 
3311        The format which is used for the sparse matrix input, is equivalent to a
3312     row-major ordering.. i.e for the following matrix, the input data expected is
3313     as shown:
3314 
3315         1 0 0
3316         2 0 3     P0
3317        -------
3318         4 5 6     P1
3319 
3320      Process0 [P0]: rows_owned=[0,1]
3321         i =  {0,1,3}  [size = nrow+1  = 2+1]
3322         j =  {0,0,2}  [size = nz = 6]
3323         v =  {1,2,3}  [size = nz = 6]
3324 
3325      Process1 [P1]: rows_owned=[2]
3326         i =  {0,3}    [size = nrow+1  = 1+1]
3327         j =  {0,1,2}  [size = nz = 6]
3328         v =  {4,5,6}  [size = nz = 6]
3329 
3330 .keywords: matrix, aij, compressed row, sparse, parallel
3331 
3332 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3333           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3334 @*/
3335 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3336 {
3337   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3338 
3339   PetscFunctionBegin;
3340   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3341   if (f) {
3342     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3343   }
3344   PetscFunctionReturn(0);
3345 }
3346 
3347 #undef __FUNCT__
3348 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3349 /*@C
3350    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3351    (the default parallel PETSc format).  For good matrix assembly performance
3352    the user should preallocate the matrix storage by setting the parameters
3353    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3354    performance can be increased by more than a factor of 50.
3355 
3356    Collective on MPI_Comm
3357 
3358    Input Parameters:
3359 +  A - the matrix
3360 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3361            (same value is used for all local rows)
3362 .  d_nnz - array containing the number of nonzeros in the various rows of the
3363            DIAGONAL portion of the local submatrix (possibly different for each row)
3364            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3365            The size of this array is equal to the number of local rows, i.e 'm'.
3366            You must leave room for the diagonal entry even if it is zero.
3367 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3368            submatrix (same value is used for all local rows).
3369 -  o_nnz - array containing the number of nonzeros in the various rows of the
3370            OFF-DIAGONAL portion of the local submatrix (possibly different for
3371            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3372            structure. The size of this array is equal to the number
3373            of local rows, i.e 'm'.
3374 
3375    If the *_nnz parameter is given then the *_nz parameter is ignored
3376 
3377    The AIJ format (also called the Yale sparse matrix format or
3378    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3379    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3380 
3381    The parallel matrix is partitioned such that the first m0 rows belong to
3382    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3383    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3384 
3385    The DIAGONAL portion of the local submatrix of a processor can be defined
3386    as the submatrix which is obtained by extraction the part corresponding
3387    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3388    first row that belongs to the processor, and r2 is the last row belonging
3389    to the this processor. This is a square mxm matrix. The remaining portion
3390    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3391 
3392    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3393 
3394    You can call MatGetInfo() to get information on how effective the preallocation was;
3395    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3396    You can also run with the option -info and look for messages with the string
3397    malloc in them to see if additional memory allocation was needed.
3398 
3399    Example usage:
3400 
3401    Consider the following 8x8 matrix with 34 non-zero values, that is
3402    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3403    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3404    as follows:
3405 
3406 .vb
3407             1  2  0  |  0  3  0  |  0  4
3408     Proc0   0  5  6  |  7  0  0  |  8  0
3409             9  0 10  | 11  0  0  | 12  0
3410     -------------------------------------
3411            13  0 14  | 15 16 17  |  0  0
3412     Proc1   0 18  0  | 19 20 21  |  0  0
3413             0  0  0  | 22 23  0  | 24  0
3414     -------------------------------------
3415     Proc2  25 26 27  |  0  0 28  | 29  0
3416            30  0  0  | 31 32 33  |  0 34
3417 .ve
3418 
3419    This can be represented as a collection of submatrices as:
3420 
3421 .vb
3422       A B C
3423       D E F
3424       G H I
3425 .ve
3426 
3427    Where the submatrices A,B,C are owned by proc0, D,E,F are
3428    owned by proc1, G,H,I are owned by proc2.
3429 
3430    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3431    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3432    The 'M','N' parameters are 8,8, and have the same values on all procs.
3433 
3434    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3435    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3436    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3437    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3438    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3439    matrix, ans [DF] as another SeqAIJ matrix.
3440 
3441    When d_nz, o_nz parameters are specified, d_nz storage elements are
3442    allocated for every row of the local diagonal submatrix, and o_nz
3443    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3444    One way to choose d_nz and o_nz is to use the max nonzerors per local
3445    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3446    In this case, the values of d_nz,o_nz are:
3447 .vb
3448      proc0 : dnz = 2, o_nz = 2
3449      proc1 : dnz = 3, o_nz = 2
3450      proc2 : dnz = 1, o_nz = 4
3451 .ve
3452    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3453    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3454    for proc3. i.e we are using 12+15+10=37 storage locations to store
3455    34 values.
3456 
3457    When d_nnz, o_nnz parameters are specified, the storage is specified
3458    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3459    In the above case the values for d_nnz,o_nnz are:
3460 .vb
3461      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3462      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3463      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3464 .ve
3465    Here the space allocated is sum of all the above values i.e 34, and
3466    hence pre-allocation is perfect.
3467 
3468    Level: intermediate
3469 
3470 .keywords: matrix, aij, compressed row, sparse, parallel
3471 
3472 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3473           MPIAIJ, MatGetInfo()
3474 @*/
3475 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3476 {
3477   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3478 
3479   PetscFunctionBegin;
3480   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3481   if (f) {
3482     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3483   }
3484   PetscFunctionReturn(0);
3485 }
3486 
3487 #undef __FUNCT__
3488 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3489 /*@
3490      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3491          CSR format the local rows.
3492 
3493    Collective on MPI_Comm
3494 
3495    Input Parameters:
3496 +  comm - MPI communicator
3497 .  m - number of local rows (Cannot be PETSC_DECIDE)
3498 .  n - This value should be the same as the local size used in creating the
3499        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3500        calculated if N is given) For square matrices n is almost always m.
3501 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3502 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3503 .   i - row indices
3504 .   j - column indices
3505 -   a - matrix values
3506 
3507    Output Parameter:
3508 .   mat - the matrix
3509 
3510    Level: intermediate
3511 
3512    Notes:
3513        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3514      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3515      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3516 
3517        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3518 
3519        The format which is used for the sparse matrix input, is equivalent to a
3520     row-major ordering.. i.e for the following matrix, the input data expected is
3521     as shown:
3522 
3523         1 0 0
3524         2 0 3     P0
3525        -------
3526         4 5 6     P1
3527 
3528      Process0 [P0]: rows_owned=[0,1]
3529         i =  {0,1,3}  [size = nrow+1  = 2+1]
3530         j =  {0,0,2}  [size = nz = 6]
3531         v =  {1,2,3}  [size = nz = 6]
3532 
3533      Process1 [P1]: rows_owned=[2]
3534         i =  {0,3}    [size = nrow+1  = 1+1]
3535         j =  {0,1,2}  [size = nz = 6]
3536         v =  {4,5,6}  [size = nz = 6]
3537 
3538 .keywords: matrix, aij, compressed row, sparse, parallel
3539 
3540 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3541           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3542 @*/
3543 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3544 {
3545   PetscErrorCode ierr;
3546 
3547  PetscFunctionBegin;
3548   if (i[0]) {
3549     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3550   }
3551   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3552   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3553   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3554   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3555   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3556   PetscFunctionReturn(0);
3557 }
3558 
3559 #undef __FUNCT__
3560 #define __FUNCT__ "MatCreateMPIAIJ"
3561 /*@C
3562    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3563    (the default parallel PETSc format).  For good matrix assembly performance
3564    the user should preallocate the matrix storage by setting the parameters
3565    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3566    performance can be increased by more than a factor of 50.
3567 
3568    Collective on MPI_Comm
3569 
3570    Input Parameters:
3571 +  comm - MPI communicator
3572 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3573            This value should be the same as the local size used in creating the
3574            y vector for the matrix-vector product y = Ax.
3575 .  n - This value should be the same as the local size used in creating the
3576        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3577        calculated if N is given) For square matrices n is almost always m.
3578 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3579 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3580 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3581            (same value is used for all local rows)
3582 .  d_nnz - array containing the number of nonzeros in the various rows of the
3583            DIAGONAL portion of the local submatrix (possibly different for each row)
3584            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3585            The size of this array is equal to the number of local rows, i.e 'm'.
3586            You must leave room for the diagonal entry even if it is zero.
3587 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3588            submatrix (same value is used for all local rows).
3589 -  o_nnz - array containing the number of nonzeros in the various rows of the
3590            OFF-DIAGONAL portion of the local submatrix (possibly different for
3591            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3592            structure. The size of this array is equal to the number
3593            of local rows, i.e 'm'.
3594 
3595    Output Parameter:
3596 .  A - the matrix
3597 
3598    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3599    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3600    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3601 
3602    Notes:
3603    If the *_nnz parameter is given then the *_nz parameter is ignored
3604 
3605    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3606    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3607    storage requirements for this matrix.
3608 
3609    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3610    processor than it must be used on all processors that share the object for
3611    that argument.
3612 
3613    The user MUST specify either the local or global matrix dimensions
3614    (possibly both).
3615 
3616    The parallel matrix is partitioned across processors such that the
3617    first m0 rows belong to process 0, the next m1 rows belong to
3618    process 1, the next m2 rows belong to process 2 etc.. where
3619    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3620    values corresponding to [m x N] submatrix.
3621 
3622    The columns are logically partitioned with the n0 columns belonging
3623    to 0th partition, the next n1 columns belonging to the next
3624    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3625 
3626    The DIAGONAL portion of the local submatrix on any given processor
3627    is the submatrix corresponding to the rows and columns m,n
3628    corresponding to the given processor. i.e diagonal matrix on
3629    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3630    etc. The remaining portion of the local submatrix [m x (N-n)]
3631    constitute the OFF-DIAGONAL portion. The example below better
3632    illustrates this concept.
3633 
3634    For a square global matrix we define each processor's diagonal portion
3635    to be its local rows and the corresponding columns (a square submatrix);
3636    each processor's off-diagonal portion encompasses the remainder of the
3637    local matrix (a rectangular submatrix).
3638 
3639    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3640 
3641    When calling this routine with a single process communicator, a matrix of
3642    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3643    type of communicator, use the construction mechanism:
3644      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3645 
3646    By default, this format uses inodes (identical nodes) when possible.
3647    We search for consecutive rows with the same nonzero structure, thereby
3648    reusing matrix information to achieve increased efficiency.
3649 
3650    Options Database Keys:
3651 +  -mat_no_inode  - Do not use inodes
3652 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3653 -  -mat_aij_oneindex - Internally use indexing starting at 1
3654         rather than 0.  Note that when calling MatSetValues(),
3655         the user still MUST index entries starting at 0!
3656 
3657 
3658    Example usage:
3659 
3660    Consider the following 8x8 matrix with 34 non-zero values, that is
3661    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3662    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3663    as follows:
3664 
3665 .vb
3666             1  2  0  |  0  3  0  |  0  4
3667     Proc0   0  5  6  |  7  0  0  |  8  0
3668             9  0 10  | 11  0  0  | 12  0
3669     -------------------------------------
3670            13  0 14  | 15 16 17  |  0  0
3671     Proc1   0 18  0  | 19 20 21  |  0  0
3672             0  0  0  | 22 23  0  | 24  0
3673     -------------------------------------
3674     Proc2  25 26 27  |  0  0 28  | 29  0
3675            30  0  0  | 31 32 33  |  0 34
3676 .ve
3677 
3678    This can be represented as a collection of submatrices as:
3679 
3680 .vb
3681       A B C
3682       D E F
3683       G H I
3684 .ve
3685 
3686    Where the submatrices A,B,C are owned by proc0, D,E,F are
3687    owned by proc1, G,H,I are owned by proc2.
3688 
3689    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3690    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3691    The 'M','N' parameters are 8,8, and have the same values on all procs.
3692 
3693    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3694    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3695    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3696    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3697    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3698    matrix, ans [DF] as another SeqAIJ matrix.
3699 
3700    When d_nz, o_nz parameters are specified, d_nz storage elements are
3701    allocated for every row of the local diagonal submatrix, and o_nz
3702    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3703    One way to choose d_nz and o_nz is to use the max nonzerors per local
3704    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3705    In this case, the values of d_nz,o_nz are:
3706 .vb
3707      proc0 : dnz = 2, o_nz = 2
3708      proc1 : dnz = 3, o_nz = 2
3709      proc2 : dnz = 1, o_nz = 4
3710 .ve
3711    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3712    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3713    for proc3. i.e we are using 12+15+10=37 storage locations to store
3714    34 values.
3715 
3716    When d_nnz, o_nnz parameters are specified, the storage is specified
3717    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3718    In the above case the values for d_nnz,o_nnz are:
3719 .vb
3720      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3721      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3722      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3723 .ve
3724    Here the space allocated is sum of all the above values i.e 34, and
3725    hence pre-allocation is perfect.
3726 
3727    Level: intermediate
3728 
3729 .keywords: matrix, aij, compressed row, sparse, parallel
3730 
3731 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3732           MPIAIJ, MatCreateMPIAIJWithArrays()
3733 @*/
3734 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3735 {
3736   PetscErrorCode ierr;
3737   PetscMPIInt    size;
3738 
3739   PetscFunctionBegin;
3740   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3741   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3742   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3743   if (size > 1) {
3744     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3745     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3746   } else {
3747     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3748     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3749   }
3750   PetscFunctionReturn(0);
3751 }
3752 
3753 #undef __FUNCT__
3754 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3755 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3756 {
3757   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3758 
3759   PetscFunctionBegin;
3760   *Ad     = a->A;
3761   *Ao     = a->B;
3762   *colmap = a->garray;
3763   PetscFunctionReturn(0);
3764 }
3765 
3766 #undef __FUNCT__
3767 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3768 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3769 {
3770   PetscErrorCode ierr;
3771   PetscInt       i;
3772   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3773 
3774   PetscFunctionBegin;
3775   if (coloring->ctype == IS_COLORING_GLOBAL) {
3776     ISColoringValue *allcolors,*colors;
3777     ISColoring      ocoloring;
3778 
3779     /* set coloring for diagonal portion */
3780     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3781 
3782     /* set coloring for off-diagonal portion */
3783     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3784     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3785     for (i=0; i<a->B->cmap->n; i++) {
3786       colors[i] = allcolors[a->garray[i]];
3787     }
3788     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3789     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3790     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3791     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3792   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3793     ISColoringValue *colors;
3794     PetscInt        *larray;
3795     ISColoring      ocoloring;
3796 
3797     /* set coloring for diagonal portion */
3798     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3799     for (i=0; i<a->A->cmap->n; i++) {
3800       larray[i] = i + A->cmap->rstart;
3801     }
3802     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3803     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3804     for (i=0; i<a->A->cmap->n; i++) {
3805       colors[i] = coloring->colors[larray[i]];
3806     }
3807     ierr = PetscFree(larray);CHKERRQ(ierr);
3808     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3809     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3810     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3811 
3812     /* set coloring for off-diagonal portion */
3813     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3814     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3815     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3816     for (i=0; i<a->B->cmap->n; i++) {
3817       colors[i] = coloring->colors[larray[i]];
3818     }
3819     ierr = PetscFree(larray);CHKERRQ(ierr);
3820     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3821     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3822     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3823   } else {
3824     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3825   }
3826 
3827   PetscFunctionReturn(0);
3828 }
3829 
3830 #if defined(PETSC_HAVE_ADIC)
3831 #undef __FUNCT__
3832 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3833 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3834 {
3835   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3836   PetscErrorCode ierr;
3837 
3838   PetscFunctionBegin;
3839   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3840   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3841   PetscFunctionReturn(0);
3842 }
3843 #endif
3844 
3845 #undef __FUNCT__
3846 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3847 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3848 {
3849   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3850   PetscErrorCode ierr;
3851 
3852   PetscFunctionBegin;
3853   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3854   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3855   PetscFunctionReturn(0);
3856 }
3857 
3858 #undef __FUNCT__
3859 #define __FUNCT__ "MatMerge"
3860 /*@
3861       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3862                  matrices from each processor
3863 
3864     Collective on MPI_Comm
3865 
3866    Input Parameters:
3867 +    comm - the communicators the parallel matrix will live on
3868 .    inmat - the input sequential matrices
3869 .    n - number of local columns (or PETSC_DECIDE)
3870 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3871 
3872    Output Parameter:
3873 .    outmat - the parallel matrix generated
3874 
3875     Level: advanced
3876 
3877    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3878 
3879 @*/
3880 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3881 {
3882   PetscErrorCode ierr;
3883   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3884   PetscInt       *indx;
3885   PetscScalar    *values;
3886 
3887   PetscFunctionBegin;
3888   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3889   if (scall == MAT_INITIAL_MATRIX){
3890     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3891     if (n == PETSC_DECIDE){
3892       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3893     }
3894     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3895     rstart -= m;
3896 
3897     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3898     for (i=0;i<m;i++) {
3899       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3900       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3901       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3902     }
3903     /* This routine will ONLY return MPIAIJ type matrix */
3904     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3905     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3906     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3907     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3908     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3909 
3910   } else if (scall == MAT_REUSE_MATRIX){
3911     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3912   } else {
3913     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3914   }
3915 
3916   for (i=0;i<m;i++) {
3917     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3918     Ii    = i + rstart;
3919     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3920     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3921   }
3922   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3923   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3924   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3925 
3926   PetscFunctionReturn(0);
3927 }
3928 
3929 #undef __FUNCT__
3930 #define __FUNCT__ "MatFileSplit"
3931 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3932 {
3933   PetscErrorCode    ierr;
3934   PetscMPIInt       rank;
3935   PetscInt          m,N,i,rstart,nnz;
3936   size_t            len;
3937   const PetscInt    *indx;
3938   PetscViewer       out;
3939   char              *name;
3940   Mat               B;
3941   const PetscScalar *values;
3942 
3943   PetscFunctionBegin;
3944   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3945   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3946   /* Should this be the type of the diagonal block of A? */
3947   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3948   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3949   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3950   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3951   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3952   for (i=0;i<m;i++) {
3953     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3954     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3955     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3956   }
3957   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3958   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3959 
3960   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3961   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3962   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3963   sprintf(name,"%s.%d",outfile,rank);
3964   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3965   ierr = PetscFree(name);
3966   ierr = MatView(B,out);CHKERRQ(ierr);
3967   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3968   ierr = MatDestroy(B);CHKERRQ(ierr);
3969   PetscFunctionReturn(0);
3970 }
3971 
3972 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3973 #undef __FUNCT__
3974 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3975 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3976 {
3977   PetscErrorCode       ierr;
3978   Mat_Merge_SeqsToMPI  *merge;
3979   PetscContainer       container;
3980 
3981   PetscFunctionBegin;
3982   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3983   if (container) {
3984     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3985     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3986     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3987     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3988     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3989     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3990     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3991     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3992     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3993     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3994     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3995     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3996     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3997     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
3998 
3999     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4000     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4001   }
4002   ierr = PetscFree(merge);CHKERRQ(ierr);
4003 
4004   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4005   PetscFunctionReturn(0);
4006 }
4007 
4008 #include "../src/mat/utils/freespace.h"
4009 #include "petscbt.h"
4010 
4011 #undef __FUNCT__
4012 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4013 /*@C
4014       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4015                  matrices from each processor
4016 
4017     Collective on MPI_Comm
4018 
4019    Input Parameters:
4020 +    comm - the communicators the parallel matrix will live on
4021 .    seqmat - the input sequential matrices
4022 .    m - number of local rows (or PETSC_DECIDE)
4023 .    n - number of local columns (or PETSC_DECIDE)
4024 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4025 
4026    Output Parameter:
4027 .    mpimat - the parallel matrix generated
4028 
4029     Level: advanced
4030 
4031    Notes:
4032      The dimensions of the sequential matrix in each processor MUST be the same.
4033      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4034      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4035 @*/
4036 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4037 {
4038   PetscErrorCode       ierr;
4039   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4040   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4041   PetscMPIInt          size,rank,taga,*len_s;
4042   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4043   PetscInt             proc,m;
4044   PetscInt             **buf_ri,**buf_rj;
4045   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4046   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4047   MPI_Request          *s_waits,*r_waits;
4048   MPI_Status           *status;
4049   MatScalar            *aa=a->a;
4050   MatScalar            **abuf_r,*ba_i;
4051   Mat_Merge_SeqsToMPI  *merge;
4052   PetscContainer       container;
4053 
4054   PetscFunctionBegin;
4055   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4056 
4057   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4058   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4059 
4060   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4061   if (container) {
4062     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4063   }
4064   bi     = merge->bi;
4065   bj     = merge->bj;
4066   buf_ri = merge->buf_ri;
4067   buf_rj = merge->buf_rj;
4068 
4069   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4070   owners = merge->rowmap->range;
4071   len_s  = merge->len_s;
4072 
4073   /* send and recv matrix values */
4074   /*-----------------------------*/
4075   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4076   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4077 
4078   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4079   for (proc=0,k=0; proc<size; proc++){
4080     if (!len_s[proc]) continue;
4081     i = owners[proc];
4082     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4083     k++;
4084   }
4085 
4086   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4087   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4088   ierr = PetscFree(status);CHKERRQ(ierr);
4089 
4090   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4091   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4092 
4093   /* insert mat values of mpimat */
4094   /*----------------------------*/
4095   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4096   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4097 
4098   for (k=0; k<merge->nrecv; k++){
4099     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4100     nrows = *(buf_ri_k[k]);
4101     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4102     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4103   }
4104 
4105   /* set values of ba */
4106   m = merge->rowmap->n;
4107   for (i=0; i<m; i++) {
4108     arow = owners[rank] + i;
4109     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4110     bnzi = bi[i+1] - bi[i];
4111     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4112 
4113     /* add local non-zero vals of this proc's seqmat into ba */
4114     anzi = ai[arow+1] - ai[arow];
4115     aj   = a->j + ai[arow];
4116     aa   = a->a + ai[arow];
4117     nextaj = 0;
4118     for (j=0; nextaj<anzi; j++){
4119       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4120         ba_i[j] += aa[nextaj++];
4121       }
4122     }
4123 
4124     /* add received vals into ba */
4125     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4126       /* i-th row */
4127       if (i == *nextrow[k]) {
4128         anzi = *(nextai[k]+1) - *nextai[k];
4129         aj   = buf_rj[k] + *(nextai[k]);
4130         aa   = abuf_r[k] + *(nextai[k]);
4131         nextaj = 0;
4132         for (j=0; nextaj<anzi; j++){
4133           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4134             ba_i[j] += aa[nextaj++];
4135           }
4136         }
4137         nextrow[k]++; nextai[k]++;
4138       }
4139     }
4140     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4141   }
4142   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4143   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4144 
4145   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4146   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4147   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4148   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4149   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4150   PetscFunctionReturn(0);
4151 }
4152 
4153 #undef __FUNCT__
4154 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4155 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4156 {
4157   PetscErrorCode       ierr;
4158   Mat                  B_mpi;
4159   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4160   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4161   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4162   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4163   PetscInt             len,proc,*dnz,*onz;
4164   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4165   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4166   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4167   MPI_Status           *status;
4168   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4169   PetscBT              lnkbt;
4170   Mat_Merge_SeqsToMPI  *merge;
4171   PetscContainer       container;
4172 
4173   PetscFunctionBegin;
4174   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4175 
4176   /* make sure it is a PETSc comm */
4177   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4179   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4180 
4181   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4182   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4183 
4184   /* determine row ownership */
4185   /*---------------------------------------------------------*/
4186   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4187   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4188   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4189   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4190   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4191   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4192   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4193 
4194   m      = merge->rowmap->n;
4195   M      = merge->rowmap->N;
4196   owners = merge->rowmap->range;
4197 
4198   /* determine the number of messages to send, their lengths */
4199   /*---------------------------------------------------------*/
4200   len_s  = merge->len_s;
4201 
4202   len = 0;  /* length of buf_si[] */
4203   merge->nsend = 0;
4204   for (proc=0; proc<size; proc++){
4205     len_si[proc] = 0;
4206     if (proc == rank){
4207       len_s[proc] = 0;
4208     } else {
4209       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4210       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4211     }
4212     if (len_s[proc]) {
4213       merge->nsend++;
4214       nrows = 0;
4215       for (i=owners[proc]; i<owners[proc+1]; i++){
4216         if (ai[i+1] > ai[i]) nrows++;
4217       }
4218       len_si[proc] = 2*(nrows+1);
4219       len += len_si[proc];
4220     }
4221   }
4222 
4223   /* determine the number and length of messages to receive for ij-structure */
4224   /*-------------------------------------------------------------------------*/
4225   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4226   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4227 
4228   /* post the Irecv of j-structure */
4229   /*-------------------------------*/
4230   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4231   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4232 
4233   /* post the Isend of j-structure */
4234   /*--------------------------------*/
4235   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4236 
4237   for (proc=0, k=0; proc<size; proc++){
4238     if (!len_s[proc]) continue;
4239     i = owners[proc];
4240     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4241     k++;
4242   }
4243 
4244   /* receives and sends of j-structure are complete */
4245   /*------------------------------------------------*/
4246   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4247   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4248 
4249   /* send and recv i-structure */
4250   /*---------------------------*/
4251   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4252   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4253 
4254   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4255   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4256   for (proc=0,k=0; proc<size; proc++){
4257     if (!len_s[proc]) continue;
4258     /* form outgoing message for i-structure:
4259          buf_si[0]:                 nrows to be sent
4260                [1:nrows]:           row index (global)
4261                [nrows+1:2*nrows+1]: i-structure index
4262     */
4263     /*-------------------------------------------*/
4264     nrows = len_si[proc]/2 - 1;
4265     buf_si_i    = buf_si + nrows+1;
4266     buf_si[0]   = nrows;
4267     buf_si_i[0] = 0;
4268     nrows = 0;
4269     for (i=owners[proc]; i<owners[proc+1]; i++){
4270       anzi = ai[i+1] - ai[i];
4271       if (anzi) {
4272         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4273         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4274         nrows++;
4275       }
4276     }
4277     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4278     k++;
4279     buf_si += len_si[proc];
4280   }
4281 
4282   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4283   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4284 
4285   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4286   for (i=0; i<merge->nrecv; i++){
4287     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4288   }
4289 
4290   ierr = PetscFree(len_si);CHKERRQ(ierr);
4291   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4292   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4293   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4294   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4295   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4296   ierr = PetscFree(status);CHKERRQ(ierr);
4297 
4298   /* compute a local seq matrix in each processor */
4299   /*----------------------------------------------*/
4300   /* allocate bi array and free space for accumulating nonzero column info */
4301   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4302   bi[0] = 0;
4303 
4304   /* create and initialize a linked list */
4305   nlnk = N+1;
4306   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4307 
4308   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4309   len = 0;
4310   len  = ai[owners[rank+1]] - ai[owners[rank]];
4311   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4312   current_space = free_space;
4313 
4314   /* determine symbolic info for each local row */
4315   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4316 
4317   for (k=0; k<merge->nrecv; k++){
4318     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4319     nrows = *buf_ri_k[k];
4320     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4321     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4322   }
4323 
4324   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4325   len = 0;
4326   for (i=0;i<m;i++) {
4327     bnzi   = 0;
4328     /* add local non-zero cols of this proc's seqmat into lnk */
4329     arow   = owners[rank] + i;
4330     anzi   = ai[arow+1] - ai[arow];
4331     aj     = a->j + ai[arow];
4332     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4333     bnzi += nlnk;
4334     /* add received col data into lnk */
4335     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4336       if (i == *nextrow[k]) { /* i-th row */
4337         anzi = *(nextai[k]+1) - *nextai[k];
4338         aj   = buf_rj[k] + *nextai[k];
4339         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4340         bnzi += nlnk;
4341         nextrow[k]++; nextai[k]++;
4342       }
4343     }
4344     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4345 
4346     /* if free space is not available, make more free space */
4347     if (current_space->local_remaining<bnzi) {
4348       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4349       nspacedouble++;
4350     }
4351     /* copy data into free space, then initialize lnk */
4352     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4353     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4354 
4355     current_space->array           += bnzi;
4356     current_space->local_used      += bnzi;
4357     current_space->local_remaining -= bnzi;
4358 
4359     bi[i+1] = bi[i] + bnzi;
4360   }
4361 
4362   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4363 
4364   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4365   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4366   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4367 
4368   /* create symbolic parallel matrix B_mpi */
4369   /*---------------------------------------*/
4370   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4371   if (n==PETSC_DECIDE) {
4372     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4373   } else {
4374     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4375   }
4376   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4377   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4378   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4379 
4380   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4381   B_mpi->assembled     = PETSC_FALSE;
4382   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4383   merge->bi            = bi;
4384   merge->bj            = bj;
4385   merge->buf_ri        = buf_ri;
4386   merge->buf_rj        = buf_rj;
4387   merge->coi           = PETSC_NULL;
4388   merge->coj           = PETSC_NULL;
4389   merge->owners_co     = PETSC_NULL;
4390 
4391   /* attach the supporting struct to B_mpi for reuse */
4392   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4393   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4394   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4395   *mpimat = B_mpi;
4396 
4397   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4398   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4399   PetscFunctionReturn(0);
4400 }
4401 
4402 #undef __FUNCT__
4403 #define __FUNCT__ "MatMerge_SeqsToMPI"
4404 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4405 {
4406   PetscErrorCode   ierr;
4407 
4408   PetscFunctionBegin;
4409   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4410   if (scall == MAT_INITIAL_MATRIX){
4411     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4412   }
4413   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4414   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4415   PetscFunctionReturn(0);
4416 }
4417 
4418 #undef __FUNCT__
4419 #define __FUNCT__ "MatGetLocalMat"
4420 /*@
4421      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4422 
4423     Not Collective
4424 
4425    Input Parameters:
4426 +    A - the matrix
4427 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4428 
4429    Output Parameter:
4430 .    A_loc - the local sequential matrix generated
4431 
4432     Level: developer
4433 
4434 @*/
4435 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4436 {
4437   PetscErrorCode  ierr;
4438   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4439   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4440   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4441   MatScalar       *aa=a->a,*ba=b->a,*cam;
4442   PetscScalar     *ca;
4443   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4444   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4445 
4446   PetscFunctionBegin;
4447   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4448   if (scall == MAT_INITIAL_MATRIX){
4449     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4450     ci[0] = 0;
4451     for (i=0; i<am; i++){
4452       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4453     }
4454     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4455     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4456     k = 0;
4457     for (i=0; i<am; i++) {
4458       ncols_o = bi[i+1] - bi[i];
4459       ncols_d = ai[i+1] - ai[i];
4460       /* off-diagonal portion of A */
4461       for (jo=0; jo<ncols_o; jo++) {
4462         col = cmap[*bj];
4463         if (col >= cstart) break;
4464         cj[k]   = col; bj++;
4465         ca[k++] = *ba++;
4466       }
4467       /* diagonal portion of A */
4468       for (j=0; j<ncols_d; j++) {
4469         cj[k]   = cstart + *aj++;
4470         ca[k++] = *aa++;
4471       }
4472       /* off-diagonal portion of A */
4473       for (j=jo; j<ncols_o; j++) {
4474         cj[k]   = cmap[*bj++];
4475         ca[k++] = *ba++;
4476       }
4477     }
4478     /* put together the new matrix */
4479     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4480     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4481     /* Since these are PETSc arrays, change flags to free them as necessary. */
4482     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4483     mat->free_a  = PETSC_TRUE;
4484     mat->free_ij = PETSC_TRUE;
4485     mat->nonew   = 0;
4486   } else if (scall == MAT_REUSE_MATRIX){
4487     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4488     ci = mat->i; cj = mat->j; cam = mat->a;
4489     for (i=0; i<am; i++) {
4490       /* off-diagonal portion of A */
4491       ncols_o = bi[i+1] - bi[i];
4492       for (jo=0; jo<ncols_o; jo++) {
4493         col = cmap[*bj];
4494         if (col >= cstart) break;
4495         *cam++ = *ba++; bj++;
4496       }
4497       /* diagonal portion of A */
4498       ncols_d = ai[i+1] - ai[i];
4499       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4500       /* off-diagonal portion of A */
4501       for (j=jo; j<ncols_o; j++) {
4502         *cam++ = *ba++; bj++;
4503       }
4504     }
4505   } else {
4506     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4507   }
4508 
4509   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4510   PetscFunctionReturn(0);
4511 }
4512 
4513 #undef __FUNCT__
4514 #define __FUNCT__ "MatGetLocalMatCondensed"
4515 /*@C
4516      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4517 
4518     Not Collective
4519 
4520    Input Parameters:
4521 +    A - the matrix
4522 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4523 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4524 
4525    Output Parameter:
4526 .    A_loc - the local sequential matrix generated
4527 
4528     Level: developer
4529 
4530 @*/
4531 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4532 {
4533   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4534   PetscErrorCode    ierr;
4535   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4536   IS                isrowa,iscola;
4537   Mat               *aloc;
4538 
4539   PetscFunctionBegin;
4540   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4541   if (!row){
4542     start = A->rmap->rstart; end = A->rmap->rend;
4543     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4544   } else {
4545     isrowa = *row;
4546   }
4547   if (!col){
4548     start = A->cmap->rstart;
4549     cmap  = a->garray;
4550     nzA   = a->A->cmap->n;
4551     nzB   = a->B->cmap->n;
4552     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4553     ncols = 0;
4554     for (i=0; i<nzB; i++) {
4555       if (cmap[i] < start) idx[ncols++] = cmap[i];
4556       else break;
4557     }
4558     imark = i;
4559     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4560     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4561     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4562     ierr = PetscFree(idx);CHKERRQ(ierr);
4563   } else {
4564     iscola = *col;
4565   }
4566   if (scall != MAT_INITIAL_MATRIX){
4567     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4568     aloc[0] = *A_loc;
4569   }
4570   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4571   *A_loc = aloc[0];
4572   ierr = PetscFree(aloc);CHKERRQ(ierr);
4573   if (!row){
4574     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4575   }
4576   if (!col){
4577     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4578   }
4579   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4580   PetscFunctionReturn(0);
4581 }
4582 
4583 #undef __FUNCT__
4584 #define __FUNCT__ "MatGetBrowsOfAcols"
4585 /*@C
4586     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4587 
4588     Collective on Mat
4589 
4590    Input Parameters:
4591 +    A,B - the matrices in mpiaij format
4592 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4593 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4594 
4595    Output Parameter:
4596 +    rowb, colb - index sets of rows and columns of B to extract
4597 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4598 -    B_seq - the sequential matrix generated
4599 
4600     Level: developer
4601 
4602 @*/
4603 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4604 {
4605   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4606   PetscErrorCode    ierr;
4607   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4608   IS                isrowb,iscolb;
4609   Mat               *bseq;
4610 
4611   PetscFunctionBegin;
4612   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4613     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4614   }
4615   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4616 
4617   if (scall == MAT_INITIAL_MATRIX){
4618     start = A->cmap->rstart;
4619     cmap  = a->garray;
4620     nzA   = a->A->cmap->n;
4621     nzB   = a->B->cmap->n;
4622     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4623     ncols = 0;
4624     for (i=0; i<nzB; i++) {  /* row < local row index */
4625       if (cmap[i] < start) idx[ncols++] = cmap[i];
4626       else break;
4627     }
4628     imark = i;
4629     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4630     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4631     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4632     ierr = PetscFree(idx);CHKERRQ(ierr);
4633     *brstart = imark;
4634     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4635   } else {
4636     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4637     isrowb = *rowb; iscolb = *colb;
4638     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4639     bseq[0] = *B_seq;
4640   }
4641   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4642   *B_seq = bseq[0];
4643   ierr = PetscFree(bseq);CHKERRQ(ierr);
4644   if (!rowb){
4645     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4646   } else {
4647     *rowb = isrowb;
4648   }
4649   if (!colb){
4650     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4651   } else {
4652     *colb = iscolb;
4653   }
4654   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4655   PetscFunctionReturn(0);
4656 }
4657 
4658 #undef __FUNCT__
4659 #define __FUNCT__ "MatGetBrowsOfAoCols"
4660 /*@C
4661     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4662     of the OFF-DIAGONAL portion of local A
4663 
4664     Collective on Mat
4665 
4666    Input Parameters:
4667 +    A,B - the matrices in mpiaij format
4668 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4669 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4670 .    startsj_r - similar to startsj for receives
4671 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4672 
4673    Output Parameter:
4674 +    B_oth - the sequential matrix generated
4675 
4676     Level: developer
4677 
4678 @*/
4679 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4680 {
4681   VecScatter_MPI_General *gen_to,*gen_from;
4682   PetscErrorCode         ierr;
4683   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4684   Mat_SeqAIJ             *b_oth;
4685   VecScatter             ctx=a->Mvctx;
4686   MPI_Comm               comm=((PetscObject)ctx)->comm;
4687   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4688   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4689   PetscScalar            *rvalues,*svalues;
4690   MatScalar              *b_otha,*bufa,*bufA;
4691   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4692   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4693   MPI_Status             *sstatus,rstatus;
4694   PetscMPIInt            jj;
4695   PetscInt               *cols,sbs,rbs;
4696   PetscScalar            *vals;
4697 
4698   PetscFunctionBegin;
4699   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4700     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4701   }
4702   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4703   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4704 
4705   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4706   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4707   rvalues  = gen_from->values; /* holds the length of receiving row */
4708   svalues  = gen_to->values;   /* holds the length of sending row */
4709   nrecvs   = gen_from->n;
4710   nsends   = gen_to->n;
4711 
4712   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4713   srow     = gen_to->indices;   /* local row index to be sent */
4714   sstarts  = gen_to->starts;
4715   sprocs   = gen_to->procs;
4716   sstatus  = gen_to->sstatus;
4717   sbs      = gen_to->bs;
4718   rstarts  = gen_from->starts;
4719   rprocs   = gen_from->procs;
4720   rbs      = gen_from->bs;
4721 
4722   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4723   if (scall == MAT_INITIAL_MATRIX){
4724     /* i-array */
4725     /*---------*/
4726     /*  post receives */
4727     for (i=0; i<nrecvs; i++){
4728       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4729       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4730       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4731     }
4732 
4733     /* pack the outgoing message */
4734     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
4735     sstartsj[0] = 0;  rstartsj[0] = 0;
4736     len = 0; /* total length of j or a array to be sent */
4737     k = 0;
4738     for (i=0; i<nsends; i++){
4739       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4740       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4741       for (j=0; j<nrows; j++) {
4742         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4743         for (l=0; l<sbs; l++){
4744           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4745           rowlen[j*sbs+l] = ncols;
4746           len += ncols;
4747           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4748         }
4749         k++;
4750       }
4751       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4752       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4753     }
4754     /* recvs and sends of i-array are completed */
4755     i = nrecvs;
4756     while (i--) {
4757       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4758     }
4759     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4760 
4761     /* allocate buffers for sending j and a arrays */
4762     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4763     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4764 
4765     /* create i-array of B_oth */
4766     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4767     b_othi[0] = 0;
4768     len = 0; /* total length of j or a array to be received */
4769     k = 0;
4770     for (i=0; i<nrecvs; i++){
4771       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4772       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4773       for (j=0; j<nrows; j++) {
4774         b_othi[k+1] = b_othi[k] + rowlen[j];
4775         len += rowlen[j]; k++;
4776       }
4777       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4778     }
4779 
4780     /* allocate space for j and a arrrays of B_oth */
4781     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4782     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4783 
4784     /* j-array */
4785     /*---------*/
4786     /*  post receives of j-array */
4787     for (i=0; i<nrecvs; i++){
4788       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4789       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4790     }
4791 
4792     /* pack the outgoing message j-array */
4793     k = 0;
4794     for (i=0; i<nsends; i++){
4795       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4796       bufJ = bufj+sstartsj[i];
4797       for (j=0; j<nrows; j++) {
4798         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4799         for (ll=0; ll<sbs; ll++){
4800           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4801           for (l=0; l<ncols; l++){
4802             *bufJ++ = cols[l];
4803           }
4804           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4805         }
4806       }
4807       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4808     }
4809 
4810     /* recvs and sends of j-array are completed */
4811     i = nrecvs;
4812     while (i--) {
4813       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4814     }
4815     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4816   } else if (scall == MAT_REUSE_MATRIX){
4817     sstartsj = *startsj;
4818     rstartsj = *startsj_r;
4819     bufa     = *bufa_ptr;
4820     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4821     b_otha   = b_oth->a;
4822   } else {
4823     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4824   }
4825 
4826   /* a-array */
4827   /*---------*/
4828   /*  post receives of a-array */
4829   for (i=0; i<nrecvs; i++){
4830     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4831     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4832   }
4833 
4834   /* pack the outgoing message a-array */
4835   k = 0;
4836   for (i=0; i<nsends; i++){
4837     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4838     bufA = bufa+sstartsj[i];
4839     for (j=0; j<nrows; j++) {
4840       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4841       for (ll=0; ll<sbs; ll++){
4842         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4843         for (l=0; l<ncols; l++){
4844           *bufA++ = vals[l];
4845         }
4846         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4847       }
4848     }
4849     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4850   }
4851   /* recvs and sends of a-array are completed */
4852   i = nrecvs;
4853   while (i--) {
4854     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4855   }
4856   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4857   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4858 
4859   if (scall == MAT_INITIAL_MATRIX){
4860     /* put together the new matrix */
4861     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4862 
4863     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4864     /* Since these are PETSc arrays, change flags to free them as necessary. */
4865     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4866     b_oth->free_a  = PETSC_TRUE;
4867     b_oth->free_ij = PETSC_TRUE;
4868     b_oth->nonew   = 0;
4869 
4870     ierr = PetscFree(bufj);CHKERRQ(ierr);
4871     if (!startsj || !bufa_ptr){
4872       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4873       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4874     } else {
4875       *startsj   = sstartsj;
4876       *startsj_r = rstartsj;
4877       *bufa_ptr  = bufa;
4878     }
4879   }
4880   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4881   PetscFunctionReturn(0);
4882 }
4883 
4884 #undef __FUNCT__
4885 #define __FUNCT__ "MatGetCommunicationStructs"
4886 /*@C
4887   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4888 
4889   Not Collective
4890 
4891   Input Parameters:
4892 . A - The matrix in mpiaij format
4893 
4894   Output Parameter:
4895 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4896 . colmap - A map from global column index to local index into lvec
4897 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4898 
4899   Level: developer
4900 
4901 @*/
4902 #if defined (PETSC_USE_CTABLE)
4903 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4904 #else
4905 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4906 #endif
4907 {
4908   Mat_MPIAIJ *a;
4909 
4910   PetscFunctionBegin;
4911   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4912   PetscValidPointer(lvec, 2)
4913   PetscValidPointer(colmap, 3)
4914   PetscValidPointer(multScatter, 4)
4915   a = (Mat_MPIAIJ *) A->data;
4916   if (lvec) *lvec = a->lvec;
4917   if (colmap) *colmap = a->colmap;
4918   if (multScatter) *multScatter = a->Mvctx;
4919   PetscFunctionReturn(0);
4920 }
4921 
4922 EXTERN_C_BEGIN
4923 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4924 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4925 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
4926 EXTERN_C_END
4927 
4928 #undef __FUNCT__
4929 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4930 /*
4931     Computes (B'*A')' since computing B*A directly is untenable
4932 
4933                n                       p                          p
4934         (              )       (              )         (                  )
4935       m (      A       )  *  n (       B      )   =   m (         C        )
4936         (              )       (              )         (                  )
4937 
4938 */
4939 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4940 {
4941   PetscErrorCode     ierr;
4942   Mat                At,Bt,Ct;
4943 
4944   PetscFunctionBegin;
4945   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4946   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4947   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4948   ierr = MatDestroy(At);CHKERRQ(ierr);
4949   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4950   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4951   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4952   PetscFunctionReturn(0);
4953 }
4954 
4955 #undef __FUNCT__
4956 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4957 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4958 {
4959   PetscErrorCode ierr;
4960   PetscInt       m=A->rmap->n,n=B->cmap->n;
4961   Mat            Cmat;
4962 
4963   PetscFunctionBegin;
4964   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4965   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4966   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4967   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4968   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4969   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4970   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4971   *C   = Cmat;
4972   PetscFunctionReturn(0);
4973 }
4974 
4975 /* ----------------------------------------------------------------*/
4976 #undef __FUNCT__
4977 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4978 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4979 {
4980   PetscErrorCode ierr;
4981 
4982   PetscFunctionBegin;
4983   if (scall == MAT_INITIAL_MATRIX){
4984     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4985   }
4986   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4987   PetscFunctionReturn(0);
4988 }
4989 
4990 EXTERN_C_BEGIN
4991 #if defined(PETSC_HAVE_MUMPS)
4992 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*);
4993 #endif
4994 #if defined(PETSC_HAVE_PASTIX)
4995 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
4996 #endif
4997 #if defined(PETSC_HAVE_SUPERLU_DIST)
4998 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
4999 #endif
5000 #if defined(PETSC_HAVE_SPOOLES)
5001 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5002 #endif
5003 EXTERN_C_END
5004 
5005 /*MC
5006    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5007 
5008    Options Database Keys:
5009 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5010 
5011   Level: beginner
5012 
5013 .seealso: MatCreateMPIAIJ()
5014 M*/
5015 
5016 EXTERN_C_BEGIN
5017 #undef __FUNCT__
5018 #define __FUNCT__ "MatCreate_MPIAIJ"
5019 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5020 {
5021   Mat_MPIAIJ     *b;
5022   PetscErrorCode ierr;
5023   PetscMPIInt    size;
5024 
5025   PetscFunctionBegin;
5026   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5027 
5028   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5029   B->data         = (void*)b;
5030   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5031   B->rmap->bs     = 1;
5032   B->assembled    = PETSC_FALSE;
5033   B->mapping      = 0;
5034 
5035   B->insertmode   = NOT_SET_VALUES;
5036   b->size         = size;
5037   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5038 
5039   /* build cache for off array entries formed */
5040   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5041   b->donotstash  = PETSC_FALSE;
5042   b->colmap      = 0;
5043   b->garray      = 0;
5044   b->roworiented = PETSC_TRUE;
5045 
5046   /* stuff used for matrix vector multiply */
5047   b->lvec      = PETSC_NULL;
5048   b->Mvctx     = PETSC_NULL;
5049 
5050   /* stuff for MatGetRow() */
5051   b->rowindices   = 0;
5052   b->rowvalues    = 0;
5053   b->getrowactive = PETSC_FALSE;
5054 
5055 #if defined(PETSC_HAVE_SPOOLES)
5056   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5057                                      "MatGetFactor_mpiaij_spooles",
5058                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5059 #endif
5060 #if defined(PETSC_HAVE_MUMPS)
5061   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5062                                      "MatGetFactor_mpiaij_mumps",
5063                                      MatGetFactor_mpiaij_mumps);CHKERRQ(ierr);
5064 #endif
5065 #if defined(PETSC_HAVE_PASTIX)
5066   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5067 					   "MatGetFactor_mpiaij_pastix",
5068 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5069 #endif
5070 #if defined(PETSC_HAVE_SUPERLU_DIST)
5071   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5072                                      "MatGetFactor_mpiaij_superlu_dist",
5073                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5074 #endif
5075   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5076                                      "MatStoreValues_MPIAIJ",
5077                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5078   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5079                                      "MatRetrieveValues_MPIAIJ",
5080                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5081   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5082 				     "MatGetDiagonalBlock_MPIAIJ",
5083                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5084   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5085 				     "MatIsTranspose_MPIAIJ",
5086 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5087   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5088 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5089 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5090   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5091 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5092 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5093   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5094 				     "MatDiagonalScaleLocal_MPIAIJ",
5095 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5096   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
5097                                      "MatConvert_MPIAIJ_MPICSRPERM",
5098                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5099   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5100                                      "MatConvert_MPIAIJ_MPICRL",
5101                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5102   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5103                                      "MatConvert_MPIAIJ_MPISBAIJ",
5104                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5105   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5106                                      "MatMatMult_MPIDense_MPIAIJ",
5107                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5108   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5109                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5110                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5111   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5112                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5113                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5114   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5115   PetscFunctionReturn(0);
5116 }
5117 EXTERN_C_END
5118 
5119 #undef __FUNCT__
5120 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5121 /*@
5122      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5123          and "off-diagonal" part of the matrix in CSR format.
5124 
5125    Collective on MPI_Comm
5126 
5127    Input Parameters:
5128 +  comm - MPI communicator
5129 .  m - number of local rows (Cannot be PETSC_DECIDE)
5130 .  n - This value should be the same as the local size used in creating the
5131        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5132        calculated if N is given) For square matrices n is almost always m.
5133 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5134 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5135 .   i - row indices for "diagonal" portion of matrix
5136 .   j - column indices
5137 .   a - matrix values
5138 .   oi - row indices for "off-diagonal" portion of matrix
5139 .   oj - column indices
5140 -   oa - matrix values
5141 
5142    Output Parameter:
5143 .   mat - the matrix
5144 
5145    Level: advanced
5146 
5147    Notes:
5148        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5149 
5150        The i and j indices are 0 based
5151 
5152        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5153 
5154        This sets local rows and cannot be used to set off-processor values.
5155 
5156        You cannot later use MatSetValues() to change values in this matrix.
5157 
5158 .keywords: matrix, aij, compressed row, sparse, parallel
5159 
5160 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5161           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5162 @*/
5163 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5164 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5165 {
5166   PetscErrorCode ierr;
5167   Mat_MPIAIJ     *maij;
5168 
5169  PetscFunctionBegin;
5170   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5171   if (i[0]) {
5172     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5173   }
5174   if (oi[0]) {
5175     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5176   }
5177   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5178   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5179   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5180   maij = (Mat_MPIAIJ*) (*mat)->data;
5181   maij->donotstash     = PETSC_TRUE;
5182   (*mat)->preallocated = PETSC_TRUE;
5183 
5184   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5185   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5186   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5187   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5188 
5189   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5190   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5191 
5192   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5193   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5194   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5195   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5196 
5197   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5198   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5199   PetscFunctionReturn(0);
5200 }
5201 
5202 /*
5203     Special version for direct calls from Fortran
5204 */
5205 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5206 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5207 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5208 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5209 #endif
5210 
5211 /* Change these macros so can be used in void function */
5212 #undef CHKERRQ
5213 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5214 #undef SETERRQ2
5215 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5216 #undef SETERRQ
5217 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5218 
5219 EXTERN_C_BEGIN
5220 #undef __FUNCT__
5221 #define __FUNCT__ "matsetvaluesmpiaij_"
5222 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5223 {
5224   Mat             mat = *mmat;
5225   PetscInt        m = *mm, n = *mn;
5226   InsertMode      addv = *maddv;
5227   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5228   PetscScalar     value;
5229   PetscErrorCode  ierr;
5230 
5231   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5232   if (mat->insertmode == NOT_SET_VALUES) {
5233     mat->insertmode = addv;
5234   }
5235 #if defined(PETSC_USE_DEBUG)
5236   else if (mat->insertmode != addv) {
5237     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5238   }
5239 #endif
5240   {
5241   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5242   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5243   PetscTruth      roworiented = aij->roworiented;
5244 
5245   /* Some Variables required in the macro */
5246   Mat             A = aij->A;
5247   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5248   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5249   MatScalar       *aa = a->a;
5250   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5251   Mat             B = aij->B;
5252   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5253   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5254   MatScalar       *ba = b->a;
5255 
5256   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5257   PetscInt        nonew = a->nonew;
5258   MatScalar       *ap1,*ap2;
5259 
5260   PetscFunctionBegin;
5261   for (i=0; i<m; i++) {
5262     if (im[i] < 0) continue;
5263 #if defined(PETSC_USE_DEBUG)
5264     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5265 #endif
5266     if (im[i] >= rstart && im[i] < rend) {
5267       row      = im[i] - rstart;
5268       lastcol1 = -1;
5269       rp1      = aj + ai[row];
5270       ap1      = aa + ai[row];
5271       rmax1    = aimax[row];
5272       nrow1    = ailen[row];
5273       low1     = 0;
5274       high1    = nrow1;
5275       lastcol2 = -1;
5276       rp2      = bj + bi[row];
5277       ap2      = ba + bi[row];
5278       rmax2    = bimax[row];
5279       nrow2    = bilen[row];
5280       low2     = 0;
5281       high2    = nrow2;
5282 
5283       for (j=0; j<n; j++) {
5284         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5285         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5286         if (in[j] >= cstart && in[j] < cend){
5287           col = in[j] - cstart;
5288           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5289         } else if (in[j] < 0) continue;
5290 #if defined(PETSC_USE_DEBUG)
5291         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5292 #endif
5293         else {
5294           if (mat->was_assembled) {
5295             if (!aij->colmap) {
5296               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5297             }
5298 #if defined (PETSC_USE_CTABLE)
5299             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5300 	    col--;
5301 #else
5302             col = aij->colmap[in[j]] - 1;
5303 #endif
5304             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5305               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5306               col =  in[j];
5307               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5308               B = aij->B;
5309               b = (Mat_SeqAIJ*)B->data;
5310               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5311               rp2      = bj + bi[row];
5312               ap2      = ba + bi[row];
5313               rmax2    = bimax[row];
5314               nrow2    = bilen[row];
5315               low2     = 0;
5316               high2    = nrow2;
5317               bm       = aij->B->rmap->n;
5318               ba = b->a;
5319             }
5320           } else col = in[j];
5321           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5322         }
5323       }
5324     } else {
5325       if (!aij->donotstash) {
5326         if (roworiented) {
5327           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5328         } else {
5329           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5330         }
5331       }
5332     }
5333   }}
5334   PetscFunctionReturnVoid();
5335 }
5336 EXTERN_C_END
5337 
5338