xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 7eb7c717d43f3a9849c827bbcf90fcb7afc7e415)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "MatDistribute_MPIAIJ"
7 /*
8     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
9     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
10 
11     Only for square matrices
12 */
13 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
14 {
15   PetscMPIInt    rank,size;
16   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
17   PetscErrorCode ierr;
18   Mat            mat;
19   Mat_SeqAIJ     *gmata;
20   PetscMPIInt    tag;
21   MPI_Status     status;
22   PetscTruth     aij;
23   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
24 
25   PetscFunctionBegin;
26   CHKMEMQ;
27   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
28   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
29   if (!rank) {
30     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
31     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
32   }
33   if (reuse == MAT_INITIAL_MATRIX) {
34     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
35     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
36     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
37     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
38     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
39     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
40     rowners[0] = 0;
41     for (i=2; i<=size; i++) {
42       rowners[i] += rowners[i-1];
43     }
44     rstart = rowners[rank];
45     rend   = rowners[rank+1];
46     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
47     if (!rank) {
48       gmata = (Mat_SeqAIJ*) gmat->data;
49       /* send row lengths to all processors */
50       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
51       for (i=1; i<size; i++) {
52 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
53       }
54       /* determine number diagonal and off-diagonal counts */
55       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
56       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
57       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
58       jj = 0;
59       for (i=0; i<m; i++) {
60 	for (j=0; j<dlens[i]; j++) {
61           if (gmata->j[jj] < rstart) ld[i]++;
62 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
63 	  jj++;
64 	}
65       }
66       /* send column indices to other processes */
67       for (i=1; i<size; i++) {
68 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
69 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
70 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71       }
72 
73       /* send numerical values to other processes */
74       for (i=1; i<size; i++) {
75         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
76         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
77       }
78       gmataa = gmata->a;
79       gmataj = gmata->j;
80 
81     } else {
82       /* receive row lengths */
83       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
84       /* receive column indices */
85       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
86       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
87       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
88       /* determine number diagonal and off-diagonal counts */
89       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
90       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
91       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
92       jj = 0;
93       for (i=0; i<m; i++) {
94 	for (j=0; j<dlens[i]; j++) {
95           if (gmataj[jj] < rstart) ld[i]++;
96 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
97 	  jj++;
98 	}
99       }
100       /* receive numerical values */
101       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
102       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
103     }
104     /* set preallocation */
105     for (i=0; i<m; i++) {
106       dlens[i] -= olens[i];
107     }
108     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
109     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
110 
111     for (i=0; i<m; i++) {
112       dlens[i] += olens[i];
113     }
114     cnt  = 0;
115     for (i=0; i<m; i++) {
116       row  = rstart + i;
117       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
118       cnt += dlens[i];
119     }
120     if (rank) {
121       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
122     }
123     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
124     ierr = PetscFree(rowners);CHKERRQ(ierr);
125     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
126     *inmat = mat;
127   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
128     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
129     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
130     mat   = *inmat;
131     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
132     if (!rank) {
133       /* send numerical values to other processes */
134       gmata = (Mat_SeqAIJ*) gmat->data;
135       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
136       gmataa = gmata->a;
137       for (i=1; i<size; i++) {
138         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
139         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
140       }
141       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
142     } else {
143       /* receive numerical values from process 0*/
144       nz   = Ad->nz + Ao->nz;
145       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
146       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
147     }
148     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
149     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
150     ad = Ad->a;
151     ao = Ao->a;
152     if (mat->rmap->n) {
153       i  = 0;
154       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
155       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
156     }
157     for (i=1; i<mat->rmap->n; i++) {
158       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
159       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
160     }
161     i--;
162     if (mat->rmap->n) {
163       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
164     }
165     if (rank) {
166       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
167     }
168   }
169   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
170   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   CHKMEMQ;
172   PetscFunctionReturn(0);
173 }
174 
175 /*
176   Local utility routine that creates a mapping from the global column
177 number to the local number in the off-diagonal part of the local
178 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
179 a slightly higher hash table cost; without it it is not scalable (each processor
180 has an order N integer array but is fast to acess.
181 */
182 #undef __FUNCT__
183 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
184 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
185 {
186   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
187   PetscErrorCode ierr;
188   PetscInt       n = aij->B->cmap->n,i;
189 
190   PetscFunctionBegin;
191 #if defined (PETSC_USE_CTABLE)
192   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
193   for (i=0; i<n; i++){
194     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
195   }
196 #else
197   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
198   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
199   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
201 #endif
202   PetscFunctionReturn(0);
203 }
204 
205 
206 #define CHUNKSIZE   15
207 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
208 { \
209     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
210     lastcol1 = col;\
211     while (high1-low1 > 5) { \
212       t = (low1+high1)/2; \
213       if (rp1[t] > col) high1 = t; \
214       else             low1  = t; \
215     } \
216       for (_i=low1; _i<high1; _i++) { \
217         if (rp1[_i] > col) break; \
218         if (rp1[_i] == col) { \
219           if (addv == ADD_VALUES) ap1[_i] += value;   \
220           else                    ap1[_i] = value; \
221           goto a_noinsert; \
222         } \
223       }  \
224       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
225       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
226       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
227       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
228       N = nrow1++ - 1; a->nz++; high1++; \
229       /* shift up all the later entries in this row */ \
230       for (ii=N; ii>=_i; ii--) { \
231         rp1[ii+1] = rp1[ii]; \
232         ap1[ii+1] = ap1[ii]; \
233       } \
234       rp1[_i] = col;  \
235       ap1[_i] = value;  \
236       a_noinsert: ; \
237       ailen[row] = nrow1; \
238 }
239 
240 
241 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
242 { \
243     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
244     lastcol2 = col;\
245     while (high2-low2 > 5) { \
246       t = (low2+high2)/2; \
247       if (rp2[t] > col) high2 = t; \
248       else             low2  = t; \
249     } \
250     for (_i=low2; _i<high2; _i++) {		\
251       if (rp2[_i] > col) break;			\
252       if (rp2[_i] == col) {			      \
253 	if (addv == ADD_VALUES) ap2[_i] += value;     \
254 	else                    ap2[_i] = value;      \
255 	goto b_noinsert;			      \
256       }						      \
257     }							      \
258     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
259     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
260     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
261     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
262     N = nrow2++ - 1; b->nz++; high2++;					\
263     /* shift up all the later entries in this row */			\
264     for (ii=N; ii>=_i; ii--) {						\
265       rp2[ii+1] = rp2[ii];						\
266       ap2[ii+1] = ap2[ii];						\
267     }									\
268     rp2[_i] = col;							\
269     ap2[_i] = value;							\
270     b_noinsert: ;								\
271     bilen[row] = nrow2;							\
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
276 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
277 {
278   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
279   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
280   PetscErrorCode ierr;
281   PetscInt       l,*garray = mat->garray,diag;
282 
283   PetscFunctionBegin;
284   /* code only works for square matrices A */
285 
286   /* find size of row to the left of the diagonal part */
287   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
288   row  = row - diag;
289   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
290     if (garray[b->j[b->i[row]+l]] > diag) break;
291   }
292   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
293 
294   /* diagonal part */
295   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
296 
297   /* right of diagonal part */
298   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
299   PetscFunctionReturn(0);
300 }
301 
302 #undef __FUNCT__
303 #define __FUNCT__ "MatSetValues_MPIAIJ"
304 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
305 {
306   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
307   PetscScalar    value;
308   PetscErrorCode ierr;
309   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
310   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
311   PetscTruth     roworiented = aij->roworiented;
312 
313   /* Some Variables required in the macro */
314   Mat            A = aij->A;
315   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
316   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
317   MatScalar      *aa = a->a;
318   PetscTruth     ignorezeroentries = a->ignorezeroentries;
319   Mat            B = aij->B;
320   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
321   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
322   MatScalar      *ba = b->a;
323 
324   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
325   PetscInt       nonew = a->nonew;
326   MatScalar      *ap1,*ap2;
327 
328   PetscFunctionBegin;
329   for (i=0; i<m; i++) {
330     if (im[i] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
333 #endif
334     if (im[i] >= rstart && im[i] < rend) {
335       row      = im[i] - rstart;
336       lastcol1 = -1;
337       rp1      = aj + ai[row];
338       ap1      = aa + ai[row];
339       rmax1    = aimax[row];
340       nrow1    = ailen[row];
341       low1     = 0;
342       high1    = nrow1;
343       lastcol2 = -1;
344       rp2      = bj + bi[row];
345       ap2      = ba + bi[row];
346       rmax2    = bimax[row];
347       nrow2    = bilen[row];
348       low2     = 0;
349       high2    = nrow2;
350 
351       for (j=0; j<n; j++) {
352         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
353         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
354         if (in[j] >= cstart && in[j] < cend){
355           col = in[j] - cstart;
356           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
357         } else if (in[j] < 0) continue;
358 #if defined(PETSC_USE_DEBUG)
359         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
360 #endif
361         else {
362           if (mat->was_assembled) {
363             if (!aij->colmap) {
364               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
365             }
366 #if defined (PETSC_USE_CTABLE)
367             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
368 	    col--;
369 #else
370             col = aij->colmap[in[j]] - 1;
371 #endif
372             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
373               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
374               col =  in[j];
375               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
376               B = aij->B;
377               b = (Mat_SeqAIJ*)B->data;
378               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
379               rp2      = bj + bi[row];
380               ap2      = ba + bi[row];
381               rmax2    = bimax[row];
382               nrow2    = bilen[row];
383               low2     = 0;
384               high2    = nrow2;
385               bm       = aij->B->rmap->n;
386               ba = b->a;
387             }
388           } else col = in[j];
389           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
390         }
391       }
392     } else {
393       if (!aij->donotstash) {
394         if (roworiented) {
395           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
396         } else {
397           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
398         }
399       }
400     }
401   }
402   PetscFunctionReturn(0);
403 }
404 
405 #undef __FUNCT__
406 #define __FUNCT__ "MatGetValues_MPIAIJ"
407 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
408 {
409   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
410   PetscErrorCode ierr;
411   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
412   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
413 
414   PetscFunctionBegin;
415   for (i=0; i<m; i++) {
416     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
417     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
418     if (idxm[i] >= rstart && idxm[i] < rend) {
419       row = idxm[i] - rstart;
420       for (j=0; j<n; j++) {
421         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
422         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
423         if (idxn[j] >= cstart && idxn[j] < cend){
424           col = idxn[j] - cstart;
425           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
426         } else {
427           if (!aij->colmap) {
428             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
429           }
430 #if defined (PETSC_USE_CTABLE)
431           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
432           col --;
433 #else
434           col = aij->colmap[idxn[j]] - 1;
435 #endif
436           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
437           else {
438             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
439           }
440         }
441       }
442     } else {
443       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
444     }
445   }
446   PetscFunctionReturn(0);
447 }
448 
449 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
453 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
454 {
455   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
456   PetscErrorCode ierr;
457   PetscInt       nstash,reallocs;
458   InsertMode     addv;
459 
460   PetscFunctionBegin;
461   if (aij->donotstash) {
462     PetscFunctionReturn(0);
463   }
464 
465   /* make sure all processors are either in INSERTMODE or ADDMODE */
466   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
467   if (addv == (ADD_VALUES|INSERT_VALUES)) {
468     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
469   }
470   mat->insertmode = addv; /* in case this processor had no cache */
471 
472   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
473   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
474   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
475   PetscFunctionReturn(0);
476 }
477 
478 #undef __FUNCT__
479 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
480 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
481 {
482   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
483   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
484   PetscErrorCode ierr;
485   PetscMPIInt    n;
486   PetscInt       i,j,rstart,ncols,flg;
487   PetscInt       *row,*col;
488   PetscTruth     other_disassembled;
489   PetscScalar    *val;
490   InsertMode     addv = mat->insertmode;
491 
492   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
493   PetscFunctionBegin;
494   if (!aij->donotstash) {
495     while (1) {
496       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
497       if (!flg) break;
498 
499       for (i=0; i<n;) {
500         /* Now identify the consecutive vals belonging to the same row */
501         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
502         if (j < n) ncols = j-i;
503         else       ncols = n-i;
504         /* Now assemble all these values with a single function call */
505         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
506         i = j;
507       }
508     }
509     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
510   }
511   a->compressedrow.use     = PETSC_FALSE;
512   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
513   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
514 
515   /* determine if any processor has disassembled, if so we must
516      also disassemble ourselfs, in order that we may reassemble. */
517   /*
518      if nonzero structure of submatrix B cannot change then we know that
519      no processor disassembled thus we can skip this stuff
520   */
521   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
522     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
523     if (mat->was_assembled && !other_disassembled) {
524       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
525     }
526   }
527   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
528     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
529   }
530   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
531   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
532   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
533   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
534 
535   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
536   aij->rowvalues = 0;
537 
538   /* used by MatAXPY() */
539   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
540   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
541 
542   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
543   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
544   PetscFunctionReturn(0);
545 }
546 
547 #undef __FUNCT__
548 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
549 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
550 {
551   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
552   PetscErrorCode ierr;
553 
554   PetscFunctionBegin;
555   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
556   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatZeroRows_MPIAIJ"
562 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
563 {
564   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
565   PetscErrorCode ierr;
566   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
567   PetscInt       i,*owners = A->rmap->range;
568   PetscInt       *nprocs,j,idx,nsends,row;
569   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
570   PetscInt       *rvalues,count,base,slen,*source;
571   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
572   MPI_Comm       comm = ((PetscObject)A)->comm;
573   MPI_Request    *send_waits,*recv_waits;
574   MPI_Status     recv_status,*send_status;
575 #if defined(PETSC_DEBUG)
576   PetscTruth     found = PETSC_FALSE;
577 #endif
578 
579   PetscFunctionBegin;
580   /*  first count number of contributors to each processor */
581   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
582   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
583   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
584   j = 0;
585   for (i=0; i<N; i++) {
586     if (lastidx > (idx = rows[i])) j = 0;
587     lastidx = idx;
588     for (; j<size; j++) {
589       if (idx >= owners[j] && idx < owners[j+1]) {
590         nprocs[2*j]++;
591         nprocs[2*j+1] = 1;
592         owner[i] = j;
593 #if defined(PETSC_DEBUG)
594         found = PETSC_TRUE;
595 #endif
596         break;
597       }
598     }
599 #if defined(PETSC_DEBUG)
600     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
601     found = PETSC_FALSE;
602 #endif
603   }
604   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
605 
606   /* inform other processors of number of messages and max length*/
607   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
608 
609   /* post receives:   */
610   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
611   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
612   for (i=0; i<nrecvs; i++) {
613     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
614   }
615 
616   /* do sends:
617       1) starts[i] gives the starting index in svalues for stuff going to
618          the ith processor
619   */
620   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
621   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
622   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
623   starts[0] = 0;
624   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
625   for (i=0; i<N; i++) {
626     svalues[starts[owner[i]]++] = rows[i];
627   }
628 
629   starts[0] = 0;
630   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
631   count = 0;
632   for (i=0; i<size; i++) {
633     if (nprocs[2*i+1]) {
634       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
635     }
636   }
637   ierr = PetscFree(starts);CHKERRQ(ierr);
638 
639   base = owners[rank];
640 
641   /*  wait on receives */
642   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
643   source = lens + nrecvs;
644   count  = nrecvs; slen = 0;
645   while (count) {
646     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
647     /* unpack receives into our local space */
648     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
649     source[imdex]  = recv_status.MPI_SOURCE;
650     lens[imdex]    = n;
651     slen          += n;
652     count--;
653   }
654   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
655 
656   /* move the data into the send scatter */
657   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
658   count = 0;
659   for (i=0; i<nrecvs; i++) {
660     values = rvalues + i*nmax;
661     for (j=0; j<lens[i]; j++) {
662       lrows[count++] = values[j] - base;
663     }
664   }
665   ierr = PetscFree(rvalues);CHKERRQ(ierr);
666   ierr = PetscFree(lens);CHKERRQ(ierr);
667   ierr = PetscFree(owner);CHKERRQ(ierr);
668   ierr = PetscFree(nprocs);CHKERRQ(ierr);
669 
670   /* actually zap the local rows */
671   /*
672         Zero the required rows. If the "diagonal block" of the matrix
673      is square and the user wishes to set the diagonal we use separate
674      code so that MatSetValues() is not called for each diagonal allocating
675      new memory, thus calling lots of mallocs and slowing things down.
676 
677   */
678   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
679   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
680   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
681     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
682   } else if (diag != 0.0) {
683     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
684     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
685       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
687     }
688     for (i = 0; i < slen; i++) {
689       row  = lrows[i] + rstart;
690       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
691     }
692     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
693     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
694   } else {
695     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
696   }
697   ierr = PetscFree(lrows);CHKERRQ(ierr);
698 
699   /* wait on sends */
700   if (nsends) {
701     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
702     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
703     ierr = PetscFree(send_status);CHKERRQ(ierr);
704   }
705   ierr = PetscFree(send_waits);CHKERRQ(ierr);
706   ierr = PetscFree(svalues);CHKERRQ(ierr);
707 
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatMult_MPIAIJ"
713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
714 {
715   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
716   PetscErrorCode ierr;
717   PetscInt       nt;
718 
719   PetscFunctionBegin;
720   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
721   if (nt != A->cmap->n) {
722     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
723   }
724   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
725   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
726   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
727   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNCT__
732 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
733 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
734 {
735   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
736   PetscErrorCode ierr;
737 
738   PetscFunctionBegin;
739   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatMultAdd_MPIAIJ"
745 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
746 {
747   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
748   PetscErrorCode ierr;
749 
750   PetscFunctionBegin;
751   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
752   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
753   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
754   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
755   PetscFunctionReturn(0);
756 }
757 
758 #undef __FUNCT__
759 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
760 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
761 {
762   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
763   PetscErrorCode ierr;
764   PetscTruth     merged;
765 
766   PetscFunctionBegin;
767   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
768   /* do nondiagonal part */
769   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
770   if (!merged) {
771     /* send it on its way */
772     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
773     /* do local part */
774     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
775     /* receive remote parts: note this assumes the values are not actually */
776     /* added in yy until the next line, */
777     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
778   } else {
779     /* do local part */
780     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
781     /* send it on its way */
782     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
783     /* values actually were received in the Begin() but we need to call this nop */
784     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
785   }
786   PetscFunctionReturn(0);
787 }
788 
789 EXTERN_C_BEGIN
790 #undef __FUNCT__
791 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
792 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
793 {
794   MPI_Comm       comm;
795   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
796   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
797   IS             Me,Notme;
798   PetscErrorCode ierr;
799   PetscInt       M,N,first,last,*notme,i;
800   PetscMPIInt    size;
801 
802   PetscFunctionBegin;
803 
804   /* Easy test: symmetric diagonal block */
805   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
806   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
807   if (!*f) PetscFunctionReturn(0);
808   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
809   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
810   if (size == 1) PetscFunctionReturn(0);
811 
812   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
813   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
814   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
815   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
816   for (i=0; i<first; i++) notme[i] = i;
817   for (i=last; i<M; i++) notme[i-last+first] = i;
818   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
819   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
820   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
821   Aoff = Aoffs[0];
822   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
823   Boff = Boffs[0];
824   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
825   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
826   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
827   ierr = ISDestroy(Me);CHKERRQ(ierr);
828   ierr = ISDestroy(Notme);CHKERRQ(ierr);
829 
830   PetscFunctionReturn(0);
831 }
832 EXTERN_C_END
833 
834 #undef __FUNCT__
835 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
836 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
837 {
838   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
839   PetscErrorCode ierr;
840 
841   PetscFunctionBegin;
842   /* do nondiagonal part */
843   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
844   /* send it on its way */
845   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
846   /* do local part */
847   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
848   /* receive remote parts */
849   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
850   PetscFunctionReturn(0);
851 }
852 
853 /*
854   This only works correctly for square matrices where the subblock A->A is the
855    diagonal block
856 */
857 #undef __FUNCT__
858 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
859 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
860 {
861   PetscErrorCode ierr;
862   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
863 
864   PetscFunctionBegin;
865   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
866   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) {
867     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
868   }
869   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatScale_MPIAIJ"
875 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
876 {
877   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
878   PetscErrorCode ierr;
879 
880   PetscFunctionBegin;
881   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
882   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
883   PetscFunctionReturn(0);
884 }
885 
886 #undef __FUNCT__
887 #define __FUNCT__ "MatDestroy_MPIAIJ"
888 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
889 {
890   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
891   PetscErrorCode ierr;
892 
893   PetscFunctionBegin;
894 #if defined(PETSC_USE_LOG)
895   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
896 #endif
897   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
898   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
899   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
900   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
901 #if defined (PETSC_USE_CTABLE)
902   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
903 #else
904   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
905 #endif
906   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
907   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
908   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
909   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
910   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
911   ierr = PetscFree(aij);CHKERRQ(ierr);
912 
913   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
915   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
916   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
917   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
918   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
919   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
920   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
921   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
922   PetscFunctionReturn(0);
923 }
924 
925 #undef __FUNCT__
926 #define __FUNCT__ "MatView_MPIAIJ_Binary"
927 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
928 {
929   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
930   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
931   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
932   PetscErrorCode    ierr;
933   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
934   int               fd;
935   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
936   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
937   PetscScalar       *column_values;
938 
939   PetscFunctionBegin;
940   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
941   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
942   nz   = A->nz + B->nz;
943   if (!rank) {
944     header[0] = MAT_FILE_COOKIE;
945     header[1] = mat->rmap->N;
946     header[2] = mat->cmap->N;
947     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
948     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
949     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
950     /* get largest number of rows any processor has */
951     rlen = mat->rmap->n;
952     range = mat->rmap->range;
953     for (i=1; i<size; i++) {
954       rlen = PetscMax(rlen,range[i+1] - range[i]);
955     }
956   } else {
957     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
958     rlen = mat->rmap->n;
959   }
960 
961   /* load up the local row counts */
962   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
963   for (i=0; i<mat->rmap->n; i++) {
964     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
965   }
966 
967   /* store the row lengths to the file */
968   if (!rank) {
969     MPI_Status status;
970     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
971     for (i=1; i<size; i++) {
972       rlen = range[i+1] - range[i];
973       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
974       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
975     }
976   } else {
977     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
978   }
979   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
980 
981   /* load up the local column indices */
982   nzmax = nz; /* )th processor needs space a largest processor needs */
983   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
984   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
985   cnt  = 0;
986   for (i=0; i<mat->rmap->n; i++) {
987     for (j=B->i[i]; j<B->i[i+1]; j++) {
988       if ( (col = garray[B->j[j]]) > cstart) break;
989       column_indices[cnt++] = col;
990     }
991     for (k=A->i[i]; k<A->i[i+1]; k++) {
992       column_indices[cnt++] = A->j[k] + cstart;
993     }
994     for (; j<B->i[i+1]; j++) {
995       column_indices[cnt++] = garray[B->j[j]];
996     }
997   }
998   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
999 
1000   /* store the column indices to the file */
1001   if (!rank) {
1002     MPI_Status status;
1003     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1004     for (i=1; i<size; i++) {
1005       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1006       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1007       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1008       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1009     }
1010   } else {
1011     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1012     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1013   }
1014   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1015 
1016   /* load up the local column values */
1017   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1018   cnt  = 0;
1019   for (i=0; i<mat->rmap->n; i++) {
1020     for (j=B->i[i]; j<B->i[i+1]; j++) {
1021       if ( garray[B->j[j]] > cstart) break;
1022       column_values[cnt++] = B->a[j];
1023     }
1024     for (k=A->i[i]; k<A->i[i+1]; k++) {
1025       column_values[cnt++] = A->a[k];
1026     }
1027     for (; j<B->i[i+1]; j++) {
1028       column_values[cnt++] = B->a[j];
1029     }
1030   }
1031   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1032 
1033   /* store the column values to the file */
1034   if (!rank) {
1035     MPI_Status status;
1036     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1037     for (i=1; i<size; i++) {
1038       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1039       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1040       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1041       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1042     }
1043   } else {
1044     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1045     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1046   }
1047   ierr = PetscFree(column_values);CHKERRQ(ierr);
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 #undef __FUNCT__
1052 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1053 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1054 {
1055   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1056   PetscErrorCode    ierr;
1057   PetscMPIInt       rank = aij->rank,size = aij->size;
1058   PetscTruth        isdraw,iascii,isbinary;
1059   PetscViewer       sviewer;
1060   PetscViewerFormat format;
1061 
1062   PetscFunctionBegin;
1063   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1064   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1065   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1066   if (iascii) {
1067     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1068     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1069       MatInfo    info;
1070       PetscTruth inodes;
1071 
1072       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1073       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1074       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1075       if (!inodes) {
1076         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1077 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1078       } else {
1079         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1080 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1081       }
1082       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1083       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1084       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1085       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1086       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1087       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1088       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1089       PetscFunctionReturn(0);
1090     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1091       PetscInt   inodecount,inodelimit,*inodes;
1092       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1093       if (inodes) {
1094         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1095       } else {
1096         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1097       }
1098       PetscFunctionReturn(0);
1099     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1100       PetscFunctionReturn(0);
1101     }
1102   } else if (isbinary) {
1103     if (size == 1) {
1104       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1105       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1106     } else {
1107       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1108     }
1109     PetscFunctionReturn(0);
1110   } else if (isdraw) {
1111     PetscDraw  draw;
1112     PetscTruth isnull;
1113     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1114     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1115   }
1116 
1117   if (size == 1) {
1118     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1119     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1120   } else {
1121     /* assemble the entire matrix onto first processor. */
1122     Mat         A;
1123     Mat_SeqAIJ  *Aloc;
1124     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1125     MatScalar   *a;
1126 
1127     if (mat->rmap->N > 1024) {
1128       PetscTruth flg = PETSC_FALSE;
1129 
1130       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1131       if (!flg) {
1132         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1133       }
1134     }
1135 
1136     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1137     if (!rank) {
1138       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1139     } else {
1140       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1141     }
1142     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1143     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1144     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1145     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1146 
1147     /* copy over the A part */
1148     Aloc = (Mat_SeqAIJ*)aij->A->data;
1149     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1150     row = mat->rmap->rstart;
1151     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1152     for (i=0; i<m; i++) {
1153       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1154       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1155     }
1156     aj = Aloc->j;
1157     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1158 
1159     /* copy over the B part */
1160     Aloc = (Mat_SeqAIJ*)aij->B->data;
1161     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1162     row  = mat->rmap->rstart;
1163     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1164     ct   = cols;
1165     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1166     for (i=0; i<m; i++) {
1167       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1168       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1169     }
1170     ierr = PetscFree(ct);CHKERRQ(ierr);
1171     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1172     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1173     /*
1174        Everyone has to call to draw the matrix since the graphics waits are
1175        synchronized across all processors that share the PetscDraw object
1176     */
1177     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1178     if (!rank) {
1179       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1180       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1181     }
1182     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1183     ierr = MatDestroy(A);CHKERRQ(ierr);
1184   }
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatView_MPIAIJ"
1190 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1191 {
1192   PetscErrorCode ierr;
1193   PetscTruth     iascii,isdraw,issocket,isbinary;
1194 
1195   PetscFunctionBegin;
1196   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1197   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1198   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1199   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1200   if (iascii || isdraw || isbinary || issocket) {
1201     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1202   } else {
1203     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1204   }
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 #undef __FUNCT__
1209 #define __FUNCT__ "MatSOR_MPIAIJ"
1210 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1211 {
1212   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1213   PetscErrorCode ierr;
1214   Vec            bb1 = 0;
1215   PetscTruth     hasop;
1216 
1217   PetscFunctionBegin;
1218   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1219     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1220   }
1221 
1222   if (flag == SOR_APPLY_UPPER) {
1223     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1224     PetscFunctionReturn(0);
1225   }
1226 
1227   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1228     if (flag & SOR_ZERO_INITIAL_GUESS) {
1229       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1230       its--;
1231     }
1232 
1233     while (its--) {
1234       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1235       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1236 
1237       /* update rhs: bb1 = bb - B*x */
1238       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1239       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1240 
1241       /* local sweep */
1242       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1243     }
1244   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1245     if (flag & SOR_ZERO_INITIAL_GUESS) {
1246       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1247       its--;
1248     }
1249     while (its--) {
1250       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1251       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1252 
1253       /* update rhs: bb1 = bb - B*x */
1254       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1255       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1256 
1257       /* local sweep */
1258       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1259     }
1260   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1261     if (flag & SOR_ZERO_INITIAL_GUESS) {
1262       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1263       its--;
1264     }
1265     while (its--) {
1266       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1267       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1268 
1269       /* update rhs: bb1 = bb - B*x */
1270       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1271       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1272 
1273       /* local sweep */
1274       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1275     }
1276   }  else if (flag & SOR_EISENSTAT) {
1277     Vec         xx1;
1278 
1279     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1280     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1281 
1282     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1283     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1284     if (!mat->diag) {
1285       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1286       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1287     }
1288     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1289     if (hasop) {
1290       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1291     } else {
1292       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1293     }
1294     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1295 
1296     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1297 
1298     /* local sweep */
1299     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1300     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1301     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1302   } else {
1303     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1304   }
1305 
1306   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatPermute_MPIAIJ"
1312 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1313 {
1314   MPI_Comm       comm,pcomm;
1315   PetscInt       first,local_size,nrows;
1316   const PetscInt *rows;
1317   PetscMPIInt    size;
1318   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1319   PetscErrorCode ierr;
1320 
1321   PetscFunctionBegin;
1322   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1323   /* make a collective version of 'rowp' */
1324   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1325   if (pcomm==comm) {
1326     crowp = rowp;
1327   } else {
1328     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1329     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1330     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1331     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1332   }
1333   /* collect the global row permutation and invert it */
1334   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1335   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1336   if (pcomm!=comm) {
1337     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1338   }
1339   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1340   /* get the local target indices */
1341   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1342   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1343   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1344   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1345   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1346   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1347   /* the column permutation is so much easier;
1348      make a local version of 'colp' and invert it */
1349   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1350   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1351   if (size==1) {
1352     lcolp = colp;
1353   } else {
1354     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1355     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1356     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1357   }
1358   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1359   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1360   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1361   if (size>1) {
1362     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1363     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1364   }
1365   /* now we just get the submatrix */
1366   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1367   /* clean up */
1368   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1369   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 #undef __FUNCT__
1374 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1375 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1376 {
1377   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1378   Mat            A = mat->A,B = mat->B;
1379   PetscErrorCode ierr;
1380   PetscReal      isend[5],irecv[5];
1381 
1382   PetscFunctionBegin;
1383   info->block_size     = 1.0;
1384   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1385   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1386   isend[3] = info->memory;  isend[4] = info->mallocs;
1387   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1388   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1389   isend[3] += info->memory;  isend[4] += info->mallocs;
1390   if (flag == MAT_LOCAL) {
1391     info->nz_used      = isend[0];
1392     info->nz_allocated = isend[1];
1393     info->nz_unneeded  = isend[2];
1394     info->memory       = isend[3];
1395     info->mallocs      = isend[4];
1396   } else if (flag == MAT_GLOBAL_MAX) {
1397     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1398     info->nz_used      = irecv[0];
1399     info->nz_allocated = irecv[1];
1400     info->nz_unneeded  = irecv[2];
1401     info->memory       = irecv[3];
1402     info->mallocs      = irecv[4];
1403   } else if (flag == MAT_GLOBAL_SUM) {
1404     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1405     info->nz_used      = irecv[0];
1406     info->nz_allocated = irecv[1];
1407     info->nz_unneeded  = irecv[2];
1408     info->memory       = irecv[3];
1409     info->mallocs      = irecv[4];
1410   }
1411   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1412   info->fill_ratio_needed = 0;
1413   info->factor_mallocs    = 0;
1414 
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 #undef __FUNCT__
1419 #define __FUNCT__ "MatSetOption_MPIAIJ"
1420 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1421 {
1422   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1423   PetscErrorCode ierr;
1424 
1425   PetscFunctionBegin;
1426   switch (op) {
1427   case MAT_NEW_NONZERO_LOCATIONS:
1428   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1429   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1430   case MAT_KEEP_NONZERO_PATTERN:
1431   case MAT_NEW_NONZERO_LOCATION_ERR:
1432   case MAT_USE_INODES:
1433   case MAT_IGNORE_ZERO_ENTRIES:
1434     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1435     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1436     break;
1437   case MAT_ROW_ORIENTED:
1438     a->roworiented = flg;
1439     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1440     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1441     break;
1442   case MAT_NEW_DIAGONALS:
1443     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1444     break;
1445   case MAT_IGNORE_OFF_PROC_ENTRIES:
1446     a->donotstash = PETSC_TRUE;
1447     break;
1448   case MAT_SYMMETRIC:
1449     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1450     break;
1451   case MAT_STRUCTURALLY_SYMMETRIC:
1452     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1453     break;
1454   case MAT_HERMITIAN:
1455     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1456     break;
1457   case MAT_SYMMETRY_ETERNAL:
1458     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1459     break;
1460   default:
1461     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1462   }
1463   PetscFunctionReturn(0);
1464 }
1465 
1466 #undef __FUNCT__
1467 #define __FUNCT__ "MatGetRow_MPIAIJ"
1468 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1469 {
1470   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1471   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1472   PetscErrorCode ierr;
1473   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1474   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1475   PetscInt       *cmap,*idx_p;
1476 
1477   PetscFunctionBegin;
1478   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1479   mat->getrowactive = PETSC_TRUE;
1480 
1481   if (!mat->rowvalues && (idx || v)) {
1482     /*
1483         allocate enough space to hold information from the longest row.
1484     */
1485     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1486     PetscInt     max = 1,tmp;
1487     for (i=0; i<matin->rmap->n; i++) {
1488       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1489       if (max < tmp) { max = tmp; }
1490     }
1491     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1492     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1493   }
1494 
1495   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1496   lrow = row - rstart;
1497 
1498   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1499   if (!v)   {pvA = 0; pvB = 0;}
1500   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1501   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1502   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1503   nztot = nzA + nzB;
1504 
1505   cmap  = mat->garray;
1506   if (v  || idx) {
1507     if (nztot) {
1508       /* Sort by increasing column numbers, assuming A and B already sorted */
1509       PetscInt imark = -1;
1510       if (v) {
1511         *v = v_p = mat->rowvalues;
1512         for (i=0; i<nzB; i++) {
1513           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1514           else break;
1515         }
1516         imark = i;
1517         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1518         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1519       }
1520       if (idx) {
1521         *idx = idx_p = mat->rowindices;
1522         if (imark > -1) {
1523           for (i=0; i<imark; i++) {
1524             idx_p[i] = cmap[cworkB[i]];
1525           }
1526         } else {
1527           for (i=0; i<nzB; i++) {
1528             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1529             else break;
1530           }
1531           imark = i;
1532         }
1533         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1534         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1535       }
1536     } else {
1537       if (idx) *idx = 0;
1538       if (v)   *v   = 0;
1539     }
1540   }
1541   *nz = nztot;
1542   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1543   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1544   PetscFunctionReturn(0);
1545 }
1546 
1547 #undef __FUNCT__
1548 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1549 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1550 {
1551   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1552 
1553   PetscFunctionBegin;
1554   if (!aij->getrowactive) {
1555     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1556   }
1557   aij->getrowactive = PETSC_FALSE;
1558   PetscFunctionReturn(0);
1559 }
1560 
1561 #undef __FUNCT__
1562 #define __FUNCT__ "MatNorm_MPIAIJ"
1563 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1564 {
1565   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1566   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1567   PetscErrorCode ierr;
1568   PetscInt       i,j,cstart = mat->cmap->rstart;
1569   PetscReal      sum = 0.0;
1570   MatScalar      *v;
1571 
1572   PetscFunctionBegin;
1573   if (aij->size == 1) {
1574     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1575   } else {
1576     if (type == NORM_FROBENIUS) {
1577       v = amat->a;
1578       for (i=0; i<amat->nz; i++) {
1579 #if defined(PETSC_USE_COMPLEX)
1580         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1581 #else
1582         sum += (*v)*(*v); v++;
1583 #endif
1584       }
1585       v = bmat->a;
1586       for (i=0; i<bmat->nz; i++) {
1587 #if defined(PETSC_USE_COMPLEX)
1588         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1589 #else
1590         sum += (*v)*(*v); v++;
1591 #endif
1592       }
1593       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1594       *norm = sqrt(*norm);
1595     } else if (type == NORM_1) { /* max column norm */
1596       PetscReal *tmp,*tmp2;
1597       PetscInt  *jj,*garray = aij->garray;
1598       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1599       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1600       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1601       *norm = 0.0;
1602       v = amat->a; jj = amat->j;
1603       for (j=0; j<amat->nz; j++) {
1604         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1605       }
1606       v = bmat->a; jj = bmat->j;
1607       for (j=0; j<bmat->nz; j++) {
1608         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1609       }
1610       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1611       for (j=0; j<mat->cmap->N; j++) {
1612         if (tmp2[j] > *norm) *norm = tmp2[j];
1613       }
1614       ierr = PetscFree(tmp);CHKERRQ(ierr);
1615       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1616     } else if (type == NORM_INFINITY) { /* max row norm */
1617       PetscReal ntemp = 0.0;
1618       for (j=0; j<aij->A->rmap->n; j++) {
1619         v = amat->a + amat->i[j];
1620         sum = 0.0;
1621         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1622           sum += PetscAbsScalar(*v); v++;
1623         }
1624         v = bmat->a + bmat->i[j];
1625         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1626           sum += PetscAbsScalar(*v); v++;
1627         }
1628         if (sum > ntemp) ntemp = sum;
1629       }
1630       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1631     } else {
1632       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1633     }
1634   }
1635   PetscFunctionReturn(0);
1636 }
1637 
1638 #undef __FUNCT__
1639 #define __FUNCT__ "MatTranspose_MPIAIJ"
1640 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1641 {
1642   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1643   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1644   PetscErrorCode ierr;
1645   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1646   PetscInt       cstart=A->cmap->rstart,ncol;
1647   Mat            B;
1648   MatScalar      *array;
1649 
1650   PetscFunctionBegin;
1651   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1652 
1653   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1654   ai = Aloc->i; aj = Aloc->j;
1655   bi = Bloc->i; bj = Bloc->j;
1656   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1657     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1658     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1659     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1660     for (i=0; i<ai[ma]; i++){
1661       d_nnz[aj[i]] ++;
1662       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1663     }
1664 
1665     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1666     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1667     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1668     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1669     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1670   } else {
1671     B = *matout;
1672   }
1673 
1674   /* copy over the A part */
1675   array = Aloc->a;
1676   row = A->rmap->rstart;
1677   for (i=0; i<ma; i++) {
1678     ncol = ai[i+1]-ai[i];
1679     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1680     row++; array += ncol; aj += ncol;
1681   }
1682   aj = Aloc->j;
1683   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1684 
1685   /* copy over the B part */
1686   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1687   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1688   array = Bloc->a;
1689   row = A->rmap->rstart;
1690   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1691   cols_tmp = cols;
1692   for (i=0; i<mb; i++) {
1693     ncol = bi[i+1]-bi[i];
1694     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1695     row++; array += ncol; cols_tmp += ncol;
1696   }
1697   ierr = PetscFree(cols);CHKERRQ(ierr);
1698 
1699   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1700   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1701   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1702     *matout = B;
1703   } else {
1704     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1705   }
1706   PetscFunctionReturn(0);
1707 }
1708 
1709 #undef __FUNCT__
1710 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1711 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1712 {
1713   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1714   Mat            a = aij->A,b = aij->B;
1715   PetscErrorCode ierr;
1716   PetscInt       s1,s2,s3;
1717 
1718   PetscFunctionBegin;
1719   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1720   if (rr) {
1721     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1722     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1723     /* Overlap communication with computation. */
1724     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1725   }
1726   if (ll) {
1727     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1728     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1729     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1730   }
1731   /* scale  the diagonal block */
1732   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1733 
1734   if (rr) {
1735     /* Do a scatter end and then right scale the off-diagonal block */
1736     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1737     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1738   }
1739 
1740   PetscFunctionReturn(0);
1741 }
1742 
1743 #undef __FUNCT__
1744 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1745 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1746 {
1747   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1748   PetscErrorCode ierr;
1749 
1750   PetscFunctionBegin;
1751   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1752   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1753   PetscFunctionReturn(0);
1754 }
1755 #undef __FUNCT__
1756 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1757 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1758 {
1759   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1760   PetscErrorCode ierr;
1761 
1762   PetscFunctionBegin;
1763   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1764   PetscFunctionReturn(0);
1765 }
1766 
1767 #undef __FUNCT__
1768 #define __FUNCT__ "MatEqual_MPIAIJ"
1769 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1770 {
1771   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1772   Mat            a,b,c,d;
1773   PetscTruth     flg;
1774   PetscErrorCode ierr;
1775 
1776   PetscFunctionBegin;
1777   a = matA->A; b = matA->B;
1778   c = matB->A; d = matB->B;
1779 
1780   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1781   if (flg) {
1782     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1783   }
1784   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1785   PetscFunctionReturn(0);
1786 }
1787 
1788 #undef __FUNCT__
1789 #define __FUNCT__ "MatCopy_MPIAIJ"
1790 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1791 {
1792   PetscErrorCode ierr;
1793   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1794   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1795 
1796   PetscFunctionBegin;
1797   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1798   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1799     /* because of the column compression in the off-processor part of the matrix a->B,
1800        the number of columns in a->B and b->B may be different, hence we cannot call
1801        the MatCopy() directly on the two parts. If need be, we can provide a more
1802        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1803        then copying the submatrices */
1804     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1805   } else {
1806     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1807     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1808   }
1809   PetscFunctionReturn(0);
1810 }
1811 
1812 #undef __FUNCT__
1813 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1814 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1815 {
1816   PetscErrorCode ierr;
1817 
1818   PetscFunctionBegin;
1819   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1820   PetscFunctionReturn(0);
1821 }
1822 
1823 #include "petscblaslapack.h"
1824 #undef __FUNCT__
1825 #define __FUNCT__ "MatAXPY_MPIAIJ"
1826 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1827 {
1828   PetscErrorCode ierr;
1829   PetscInt       i;
1830   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1831   PetscBLASInt   bnz,one=1;
1832   Mat_SeqAIJ     *x,*y;
1833 
1834   PetscFunctionBegin;
1835   if (str == SAME_NONZERO_PATTERN) {
1836     PetscScalar alpha = a;
1837     x = (Mat_SeqAIJ *)xx->A->data;
1838     y = (Mat_SeqAIJ *)yy->A->data;
1839     bnz = PetscBLASIntCast(x->nz);
1840     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1841     x = (Mat_SeqAIJ *)xx->B->data;
1842     y = (Mat_SeqAIJ *)yy->B->data;
1843     bnz = PetscBLASIntCast(x->nz);
1844     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1845   } else if (str == SUBSET_NONZERO_PATTERN) {
1846     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1847 
1848     x = (Mat_SeqAIJ *)xx->B->data;
1849     y = (Mat_SeqAIJ *)yy->B->data;
1850     if (y->xtoy && y->XtoY != xx->B) {
1851       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1852       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1853     }
1854     if (!y->xtoy) { /* get xtoy */
1855       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1856       y->XtoY = xx->B;
1857       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1858     }
1859     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1860   } else {
1861     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1862   }
1863   PetscFunctionReturn(0);
1864 }
1865 
1866 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1867 
1868 #undef __FUNCT__
1869 #define __FUNCT__ "MatConjugate_MPIAIJ"
1870 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1871 {
1872 #if defined(PETSC_USE_COMPLEX)
1873   PetscErrorCode ierr;
1874   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1875 
1876   PetscFunctionBegin;
1877   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1878   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1879 #else
1880   PetscFunctionBegin;
1881 #endif
1882   PetscFunctionReturn(0);
1883 }
1884 
1885 #undef __FUNCT__
1886 #define __FUNCT__ "MatRealPart_MPIAIJ"
1887 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1888 {
1889   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1890   PetscErrorCode ierr;
1891 
1892   PetscFunctionBegin;
1893   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1894   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1895   PetscFunctionReturn(0);
1896 }
1897 
1898 #undef __FUNCT__
1899 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1900 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1901 {
1902   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1903   PetscErrorCode ierr;
1904 
1905   PetscFunctionBegin;
1906   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1907   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1908   PetscFunctionReturn(0);
1909 }
1910 
1911 #ifdef PETSC_HAVE_PBGL
1912 
1913 #include <boost/parallel/mpi/bsp_process_group.hpp>
1914 #include <boost/graph/distributed/ilu_default_graph.hpp>
1915 #include <boost/graph/distributed/ilu_0_block.hpp>
1916 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1917 #include <boost/graph/distributed/petsc/interface.hpp>
1918 #include <boost/multi_array.hpp>
1919 #include <boost/parallel/distributed_property_map->hpp>
1920 
1921 #undef __FUNCT__
1922 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1923 /*
1924   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1925 */
1926 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1927 {
1928   namespace petsc = boost::distributed::petsc;
1929 
1930   namespace graph_dist = boost::graph::distributed;
1931   using boost::graph::distributed::ilu_default::process_group_type;
1932   using boost::graph::ilu_permuted;
1933 
1934   PetscTruth      row_identity, col_identity;
1935   PetscContainer  c;
1936   PetscInt        m, n, M, N;
1937   PetscErrorCode  ierr;
1938 
1939   PetscFunctionBegin;
1940   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1941   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1942   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1943   if (!row_identity || !col_identity) {
1944     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1945   }
1946 
1947   process_group_type pg;
1948   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1949   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1950   lgraph_type&   level_graph = *lgraph_p;
1951   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1952 
1953   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1954   ilu_permuted(level_graph);
1955 
1956   /* put together the new matrix */
1957   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1958   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1959   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1960   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1961   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1962   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1963   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1964 
1965   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1966   ierr = PetscContainerSetPointer(c, lgraph_p);
1967   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1968   PetscFunctionReturn(0);
1969 }
1970 
1971 #undef __FUNCT__
1972 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1973 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1974 {
1975   PetscFunctionBegin;
1976   PetscFunctionReturn(0);
1977 }
1978 
1979 #undef __FUNCT__
1980 #define __FUNCT__ "MatSolve_MPIAIJ"
1981 /*
1982   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1983 */
1984 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1985 {
1986   namespace graph_dist = boost::graph::distributed;
1987 
1988   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1989   lgraph_type*   lgraph_p;
1990   PetscContainer c;
1991   PetscErrorCode ierr;
1992 
1993   PetscFunctionBegin;
1994   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1995   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1996   ierr = VecCopy(b, x);CHKERRQ(ierr);
1997 
1998   PetscScalar* array_x;
1999   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2000   PetscInt sx;
2001   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2002 
2003   PetscScalar* array_b;
2004   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2005   PetscInt sb;
2006   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2007 
2008   lgraph_type&   level_graph = *lgraph_p;
2009   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2010 
2011   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2012   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2013                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2014 
2015   typedef boost::iterator_property_map<array_ref_type::iterator,
2016                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2017   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2018                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2019 
2020   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2021 
2022   PetscFunctionReturn(0);
2023 }
2024 #endif
2025 
2026 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2027   PetscInt       nzlocal,nsends,nrecvs;
2028   PetscMPIInt    *send_rank;
2029   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
2030   PetscScalar    *sbuf_a,**rbuf_a;
2031   PetscErrorCode (*MatDestroy)(Mat);
2032 } Mat_Redundant;
2033 
2034 #undef __FUNCT__
2035 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2036 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2037 {
2038   PetscErrorCode       ierr;
2039   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2040   PetscInt             i;
2041 
2042   PetscFunctionBegin;
2043   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
2044   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2045   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2046   for (i=0; i<redund->nrecvs; i++){
2047     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2048     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2049   }
2050   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2051   ierr = PetscFree(redund);CHKERRQ(ierr);
2052   PetscFunctionReturn(0);
2053 }
2054 
2055 #undef __FUNCT__
2056 #define __FUNCT__ "MatDestroy_MatRedundant"
2057 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2058 {
2059   PetscErrorCode  ierr;
2060   PetscContainer  container;
2061   Mat_Redundant   *redund=PETSC_NULL;
2062 
2063   PetscFunctionBegin;
2064   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2065   if (container) {
2066     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2067   } else {
2068     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2069   }
2070   A->ops->destroy = redund->MatDestroy;
2071   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2072   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2073   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2074   PetscFunctionReturn(0);
2075 }
2076 
2077 #undef __FUNCT__
2078 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2079 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2080 {
2081   PetscMPIInt    rank,size;
2082   MPI_Comm       comm=((PetscObject)mat)->comm;
2083   PetscErrorCode ierr;
2084   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2085   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2086   PetscInt       *rowrange=mat->rmap->range;
2087   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2088   Mat            A=aij->A,B=aij->B,C=*matredundant;
2089   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2090   PetscScalar    *sbuf_a;
2091   PetscInt       nzlocal=a->nz+b->nz;
2092   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2093   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2094   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2095   MatScalar      *aworkA,*aworkB;
2096   PetscScalar    *vals;
2097   PetscMPIInt    tag1,tag2,tag3,imdex;
2098   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2099                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2100   MPI_Status     recv_status,*send_status;
2101   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2102   PetscInt       **rbuf_j=PETSC_NULL;
2103   PetscScalar    **rbuf_a=PETSC_NULL;
2104   Mat_Redundant  *redund=PETSC_NULL;
2105   PetscContainer container;
2106 
2107   PetscFunctionBegin;
2108   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2109   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2110 
2111   if (reuse == MAT_REUSE_MATRIX) {
2112     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2113     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2114     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2115     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2116     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2117     if (container) {
2118       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2119     } else {
2120       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2121     }
2122     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2123 
2124     nsends    = redund->nsends;
2125     nrecvs    = redund->nrecvs;
2126     send_rank = redund->send_rank; recv_rank = send_rank + size;
2127     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
2128     sbuf_j    = redund->sbuf_j;
2129     sbuf_a    = redund->sbuf_a;
2130     rbuf_j    = redund->rbuf_j;
2131     rbuf_a    = redund->rbuf_a;
2132   }
2133 
2134   if (reuse == MAT_INITIAL_MATRIX){
2135     PetscMPIInt  subrank,subsize;
2136     PetscInt     nleftover,np_subcomm;
2137     /* get the destination processors' id send_rank, nsends and nrecvs */
2138     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2139     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2140     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
2141     recv_rank = send_rank + size;
2142     np_subcomm = size/nsubcomm;
2143     nleftover  = size - nsubcomm*np_subcomm;
2144     nsends = 0; nrecvs = 0;
2145     for (i=0; i<size; i++){ /* i=rank*/
2146       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2147         send_rank[nsends] = i; nsends++;
2148         recv_rank[nrecvs++] = i;
2149       }
2150     }
2151     if (rank >= size - nleftover){/* this proc is a leftover processor */
2152       i = size-nleftover-1;
2153       j = 0;
2154       while (j < nsubcomm - nleftover){
2155         send_rank[nsends++] = i;
2156         i--; j++;
2157       }
2158     }
2159 
2160     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2161       for (i=0; i<nleftover; i++){
2162         recv_rank[nrecvs++] = size-nleftover+i;
2163       }
2164     }
2165 
2166     /* allocate sbuf_j, sbuf_a */
2167     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2168     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2169     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2170   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2171 
2172   /* copy mat's local entries into the buffers */
2173   if (reuse == MAT_INITIAL_MATRIX){
2174     rownz_max = 0;
2175     rptr = sbuf_j;
2176     cols = sbuf_j + rend-rstart + 1;
2177     vals = sbuf_a;
2178     rptr[0] = 0;
2179     for (i=0; i<rend-rstart; i++){
2180       row = i + rstart;
2181       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2182       ncols  = nzA + nzB;
2183       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2184       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2185       /* load the column indices for this row into cols */
2186       lwrite = 0;
2187       for (l=0; l<nzB; l++) {
2188         if ((ctmp = bmap[cworkB[l]]) < cstart){
2189           vals[lwrite]   = aworkB[l];
2190           cols[lwrite++] = ctmp;
2191         }
2192       }
2193       for (l=0; l<nzA; l++){
2194         vals[lwrite]   = aworkA[l];
2195         cols[lwrite++] = cstart + cworkA[l];
2196       }
2197       for (l=0; l<nzB; l++) {
2198         if ((ctmp = bmap[cworkB[l]]) >= cend){
2199           vals[lwrite]   = aworkB[l];
2200           cols[lwrite++] = ctmp;
2201         }
2202       }
2203       vals += ncols;
2204       cols += ncols;
2205       rptr[i+1] = rptr[i] + ncols;
2206       if (rownz_max < ncols) rownz_max = ncols;
2207     }
2208     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2209   } else { /* only copy matrix values into sbuf_a */
2210     rptr = sbuf_j;
2211     vals = sbuf_a;
2212     rptr[0] = 0;
2213     for (i=0; i<rend-rstart; i++){
2214       row = i + rstart;
2215       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2216       ncols  = nzA + nzB;
2217       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2218       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2219       lwrite = 0;
2220       for (l=0; l<nzB; l++) {
2221         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2222       }
2223       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2224       for (l=0; l<nzB; l++) {
2225         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2226       }
2227       vals += ncols;
2228       rptr[i+1] = rptr[i] + ncols;
2229     }
2230   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2231 
2232   /* send nzlocal to others, and recv other's nzlocal */
2233   /*--------------------------------------------------*/
2234   if (reuse == MAT_INITIAL_MATRIX){
2235     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2236     s_waits2 = s_waits3 + nsends;
2237     s_waits1 = s_waits2 + nsends;
2238     r_waits1 = s_waits1 + nsends;
2239     r_waits2 = r_waits1 + nrecvs;
2240     r_waits3 = r_waits2 + nrecvs;
2241   } else {
2242     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2243     r_waits3 = s_waits3 + nsends;
2244   }
2245 
2246   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2247   if (reuse == MAT_INITIAL_MATRIX){
2248     /* get new tags to keep the communication clean */
2249     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2250     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2251     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2252     rbuf_nz = sbuf_nz + nsends;
2253 
2254     /* post receives of other's nzlocal */
2255     for (i=0; i<nrecvs; i++){
2256       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2257     }
2258     /* send nzlocal to others */
2259     for (i=0; i<nsends; i++){
2260       sbuf_nz[i] = nzlocal;
2261       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2262     }
2263     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2264     count = nrecvs;
2265     while (count) {
2266       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2267       recv_rank[imdex] = recv_status.MPI_SOURCE;
2268       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2269       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2270 
2271       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2272       rbuf_nz[imdex] += i + 2;
2273       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2274       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2275       count--;
2276     }
2277     /* wait on sends of nzlocal */
2278     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2279     /* send mat->i,j to others, and recv from other's */
2280     /*------------------------------------------------*/
2281     for (i=0; i<nsends; i++){
2282       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2283       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2284     }
2285     /* wait on receives of mat->i,j */
2286     /*------------------------------*/
2287     count = nrecvs;
2288     while (count) {
2289       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2290       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2291       count--;
2292     }
2293     /* wait on sends of mat->i,j */
2294     /*---------------------------*/
2295     if (nsends) {
2296       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2297     }
2298   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2299 
2300   /* post receives, send and receive mat->a */
2301   /*----------------------------------------*/
2302   for (imdex=0; imdex<nrecvs; imdex++) {
2303     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2304   }
2305   for (i=0; i<nsends; i++){
2306     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2307   }
2308   count = nrecvs;
2309   while (count) {
2310     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2311     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2312     count--;
2313   }
2314   if (nsends) {
2315     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2316   }
2317 
2318   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2319 
2320   /* create redundant matrix */
2321   /*-------------------------*/
2322   if (reuse == MAT_INITIAL_MATRIX){
2323     /* compute rownz_max for preallocation */
2324     for (imdex=0; imdex<nrecvs; imdex++){
2325       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2326       rptr = rbuf_j[imdex];
2327       for (i=0; i<j; i++){
2328         ncols = rptr[i+1] - rptr[i];
2329         if (rownz_max < ncols) rownz_max = ncols;
2330       }
2331     }
2332 
2333     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2334     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2335     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2336     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2337     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2338   } else {
2339     C = *matredundant;
2340   }
2341 
2342   /* insert local matrix entries */
2343   rptr = sbuf_j;
2344   cols = sbuf_j + rend-rstart + 1;
2345   vals = sbuf_a;
2346   for (i=0; i<rend-rstart; i++){
2347     row   = i + rstart;
2348     ncols = rptr[i+1] - rptr[i];
2349     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2350     vals += ncols;
2351     cols += ncols;
2352   }
2353   /* insert received matrix entries */
2354   for (imdex=0; imdex<nrecvs; imdex++){
2355     rstart = rowrange[recv_rank[imdex]];
2356     rend   = rowrange[recv_rank[imdex]+1];
2357     rptr = rbuf_j[imdex];
2358     cols = rbuf_j[imdex] + rend-rstart + 1;
2359     vals = rbuf_a[imdex];
2360     for (i=0; i<rend-rstart; i++){
2361       row   = i + rstart;
2362       ncols = rptr[i+1] - rptr[i];
2363       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2364       vals += ncols;
2365       cols += ncols;
2366     }
2367   }
2368   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2369   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2370   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2371   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2372   if (reuse == MAT_INITIAL_MATRIX){
2373     PetscContainer container;
2374     *matredundant = C;
2375     /* create a supporting struct and attach it to C for reuse */
2376     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2377     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2378     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2379     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2380     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2381 
2382     redund->nzlocal = nzlocal;
2383     redund->nsends  = nsends;
2384     redund->nrecvs  = nrecvs;
2385     redund->send_rank = send_rank;
2386     redund->sbuf_nz = sbuf_nz;
2387     redund->sbuf_j  = sbuf_j;
2388     redund->sbuf_a  = sbuf_a;
2389     redund->rbuf_j  = rbuf_j;
2390     redund->rbuf_a  = rbuf_a;
2391 
2392     redund->MatDestroy = C->ops->destroy;
2393     C->ops->destroy    = MatDestroy_MatRedundant;
2394   }
2395   PetscFunctionReturn(0);
2396 }
2397 
2398 #undef __FUNCT__
2399 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2400 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2401 {
2402   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2403   PetscErrorCode ierr;
2404   PetscInt       i,*idxb = 0;
2405   PetscScalar    *va,*vb;
2406   Vec            vtmp;
2407 
2408   PetscFunctionBegin;
2409   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2410   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2411   if (idx) {
2412     for (i=0; i<A->rmap->n; i++) {
2413       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2414     }
2415   }
2416 
2417   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2418   if (idx) {
2419     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2420   }
2421   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2422   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2423 
2424   for (i=0; i<A->rmap->n; i++){
2425     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2426       va[i] = vb[i];
2427       if (idx) idx[i] = a->garray[idxb[i]];
2428     }
2429   }
2430 
2431   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2432   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2433   if (idxb) {
2434     ierr = PetscFree(idxb);CHKERRQ(ierr);
2435   }
2436   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2437   PetscFunctionReturn(0);
2438 }
2439 
2440 #undef __FUNCT__
2441 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2442 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2443 {
2444   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2445   PetscErrorCode ierr;
2446   PetscInt       i,*idxb = 0;
2447   PetscScalar    *va,*vb;
2448   Vec            vtmp;
2449 
2450   PetscFunctionBegin;
2451   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2452   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2453   if (idx) {
2454     for (i=0; i<A->cmap->n; i++) {
2455       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2456     }
2457   }
2458 
2459   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2460   if (idx) {
2461     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2462   }
2463   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2464   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2465 
2466   for (i=0; i<A->rmap->n; i++){
2467     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2468       va[i] = vb[i];
2469       if (idx) idx[i] = a->garray[idxb[i]];
2470     }
2471   }
2472 
2473   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2474   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2475   if (idxb) {
2476     ierr = PetscFree(idxb);CHKERRQ(ierr);
2477   }
2478   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2479   PetscFunctionReturn(0);
2480 }
2481 
2482 #undef __FUNCT__
2483 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2484 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2485 {
2486   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2487   PetscInt       n      = A->rmap->n;
2488   PetscInt       cstart = A->cmap->rstart;
2489   PetscInt      *cmap   = mat->garray;
2490   PetscInt      *diagIdx, *offdiagIdx;
2491   Vec            diagV, offdiagV;
2492   PetscScalar   *a, *diagA, *offdiagA;
2493   PetscInt       r;
2494   PetscErrorCode ierr;
2495 
2496   PetscFunctionBegin;
2497   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2498   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2499   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2500   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2501   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2502   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2503   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2504   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2505   for(r = 0; r < n; ++r) {
2506     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2507       a[r]   = diagA[r];
2508       idx[r] = cstart + diagIdx[r];
2509     } else {
2510       a[r]   = offdiagA[r];
2511       idx[r] = cmap[offdiagIdx[r]];
2512     }
2513   }
2514   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2515   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2516   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2517   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2518   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2519   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2520   PetscFunctionReturn(0);
2521 }
2522 
2523 #undef __FUNCT__
2524 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2525 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2526 {
2527   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2528   PetscInt       n      = A->rmap->n;
2529   PetscInt       cstart = A->cmap->rstart;
2530   PetscInt      *cmap   = mat->garray;
2531   PetscInt      *diagIdx, *offdiagIdx;
2532   Vec            diagV, offdiagV;
2533   PetscScalar   *a, *diagA, *offdiagA;
2534   PetscInt       r;
2535   PetscErrorCode ierr;
2536 
2537   PetscFunctionBegin;
2538   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2539   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2540   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2541   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2542   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2543   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2544   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2545   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2546   for(r = 0; r < n; ++r) {
2547     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2548       a[r]   = diagA[r];
2549       idx[r] = cstart + diagIdx[r];
2550     } else {
2551       a[r]   = offdiagA[r];
2552       idx[r] = cmap[offdiagIdx[r]];
2553     }
2554   }
2555   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2556   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2557   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2558   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2559   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2560   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2561   PetscFunctionReturn(0);
2562 }
2563 
2564 #undef __FUNCT__
2565 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2566 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2567 {
2568   PetscErrorCode ierr;
2569   Mat            *dummy;
2570 
2571   PetscFunctionBegin;
2572   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2573   *newmat = *dummy;
2574   ierr = PetscFree(dummy);CHKERRQ(ierr);
2575   PetscFunctionReturn(0);
2576 }
2577 
2578 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2579 /* -------------------------------------------------------------------*/
2580 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2581        MatGetRow_MPIAIJ,
2582        MatRestoreRow_MPIAIJ,
2583        MatMult_MPIAIJ,
2584 /* 4*/ MatMultAdd_MPIAIJ,
2585        MatMultTranspose_MPIAIJ,
2586        MatMultTransposeAdd_MPIAIJ,
2587 #ifdef PETSC_HAVE_PBGL
2588        MatSolve_MPIAIJ,
2589 #else
2590        0,
2591 #endif
2592        0,
2593        0,
2594 /*10*/ 0,
2595        0,
2596        0,
2597        MatSOR_MPIAIJ,
2598        MatTranspose_MPIAIJ,
2599 /*15*/ MatGetInfo_MPIAIJ,
2600        MatEqual_MPIAIJ,
2601        MatGetDiagonal_MPIAIJ,
2602        MatDiagonalScale_MPIAIJ,
2603        MatNorm_MPIAIJ,
2604 /*20*/ MatAssemblyBegin_MPIAIJ,
2605        MatAssemblyEnd_MPIAIJ,
2606        MatSetOption_MPIAIJ,
2607        MatZeroEntries_MPIAIJ,
2608 /*24*/ MatZeroRows_MPIAIJ,
2609        0,
2610 #ifdef PETSC_HAVE_PBGL
2611        0,
2612 #else
2613        0,
2614 #endif
2615        0,
2616        0,
2617 /*29*/ MatSetUpPreallocation_MPIAIJ,
2618 #ifdef PETSC_HAVE_PBGL
2619        0,
2620 #else
2621        0,
2622 #endif
2623        0,
2624        0,
2625        0,
2626 /*34*/ MatDuplicate_MPIAIJ,
2627        0,
2628        0,
2629        0,
2630        0,
2631 /*39*/ MatAXPY_MPIAIJ,
2632        MatGetSubMatrices_MPIAIJ,
2633        MatIncreaseOverlap_MPIAIJ,
2634        MatGetValues_MPIAIJ,
2635        MatCopy_MPIAIJ,
2636 /*44*/ MatGetRowMax_MPIAIJ,
2637        MatScale_MPIAIJ,
2638        0,
2639        0,
2640        0,
2641 /*49*/ MatSetBlockSize_MPIAIJ,
2642        0,
2643        0,
2644        0,
2645        0,
2646 /*54*/ MatFDColoringCreate_MPIAIJ,
2647        0,
2648        MatSetUnfactored_MPIAIJ,
2649        MatPermute_MPIAIJ,
2650        0,
2651 /*59*/ MatGetSubMatrix_MPIAIJ,
2652        MatDestroy_MPIAIJ,
2653        MatView_MPIAIJ,
2654        0,
2655        0,
2656 /*64*/ 0,
2657        0,
2658        0,
2659        0,
2660        0,
2661 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2662        MatGetRowMinAbs_MPIAIJ,
2663        0,
2664        MatSetColoring_MPIAIJ,
2665 #if defined(PETSC_HAVE_ADIC)
2666        MatSetValuesAdic_MPIAIJ,
2667 #else
2668        0,
2669 #endif
2670        MatSetValuesAdifor_MPIAIJ,
2671 /*75*/ MatFDColoringApply_AIJ,
2672        0,
2673        0,
2674        0,
2675        0,
2676 /*80*/ 0,
2677        0,
2678        0,
2679 /*83*/ MatLoad_MPIAIJ,
2680        0,
2681        0,
2682        0,
2683        0,
2684        0,
2685 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2686        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2687        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2688        MatPtAP_Basic,
2689        MatPtAPSymbolic_MPIAIJ,
2690 /*94*/ MatPtAPNumeric_MPIAIJ,
2691        0,
2692        0,
2693        0,
2694        0,
2695 /*99*/ 0,
2696        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2697        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2698        MatConjugate_MPIAIJ,
2699        0,
2700 /*104*/MatSetValuesRow_MPIAIJ,
2701        MatRealPart_MPIAIJ,
2702        MatImaginaryPart_MPIAIJ,
2703        0,
2704        0,
2705 /*109*/0,
2706        MatGetRedundantMatrix_MPIAIJ,
2707        MatGetRowMin_MPIAIJ,
2708        0,
2709        0,
2710 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2711        0,
2712        0,
2713        0,
2714        0,
2715        0
2716 };
2717 
2718 /* ----------------------------------------------------------------------------------------*/
2719 
2720 EXTERN_C_BEGIN
2721 #undef __FUNCT__
2722 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2723 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2724 {
2725   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2726   PetscErrorCode ierr;
2727 
2728   PetscFunctionBegin;
2729   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2730   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2731   PetscFunctionReturn(0);
2732 }
2733 EXTERN_C_END
2734 
2735 EXTERN_C_BEGIN
2736 #undef __FUNCT__
2737 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2738 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2739 {
2740   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2741   PetscErrorCode ierr;
2742 
2743   PetscFunctionBegin;
2744   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2745   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2746   PetscFunctionReturn(0);
2747 }
2748 EXTERN_C_END
2749 
2750 #include "petscpc.h"
2751 EXTERN_C_BEGIN
2752 #undef __FUNCT__
2753 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2754 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2755 {
2756   Mat_MPIAIJ     *b;
2757   PetscErrorCode ierr;
2758   PetscInt       i;
2759 
2760   PetscFunctionBegin;
2761   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2762   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2763   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2764   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2765 
2766   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2767   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2768   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2769   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2770   if (d_nnz) {
2771     for (i=0; i<B->rmap->n; i++) {
2772       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2773     }
2774   }
2775   if (o_nnz) {
2776     for (i=0; i<B->rmap->n; i++) {
2777       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2778     }
2779   }
2780   b = (Mat_MPIAIJ*)B->data;
2781 
2782   if (!B->preallocated) {
2783     /* Explicitly create 2 MATSEQAIJ matrices. */
2784     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2785     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2786     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2787     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2788     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2789     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2790     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2791     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2792   }
2793 
2794   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2795   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2796   B->preallocated = PETSC_TRUE;
2797   PetscFunctionReturn(0);
2798 }
2799 EXTERN_C_END
2800 
2801 #undef __FUNCT__
2802 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2803 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2804 {
2805   Mat            mat;
2806   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2807   PetscErrorCode ierr;
2808 
2809   PetscFunctionBegin;
2810   *newmat       = 0;
2811   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2812   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2813   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2814   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2815   a    = (Mat_MPIAIJ*)mat->data;
2816 
2817   mat->factor       = matin->factor;
2818   mat->rmap->bs      = matin->rmap->bs;
2819   mat->assembled    = PETSC_TRUE;
2820   mat->insertmode   = NOT_SET_VALUES;
2821   mat->preallocated = PETSC_TRUE;
2822 
2823   a->size           = oldmat->size;
2824   a->rank           = oldmat->rank;
2825   a->donotstash     = oldmat->donotstash;
2826   a->roworiented    = oldmat->roworiented;
2827   a->rowindices     = 0;
2828   a->rowvalues      = 0;
2829   a->getrowactive   = PETSC_FALSE;
2830 
2831   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2832   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2833 
2834   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2835   if (oldmat->colmap) {
2836 #if defined (PETSC_USE_CTABLE)
2837     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2838 #else
2839     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2840     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2841     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2842 #endif
2843   } else a->colmap = 0;
2844   if (oldmat->garray) {
2845     PetscInt len;
2846     len  = oldmat->B->cmap->n;
2847     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2848     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2849     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2850   } else a->garray = 0;
2851 
2852   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2853   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2854   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2855   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2856   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2857   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2858   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2859   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2860   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2861   *newmat = mat;
2862   PetscFunctionReturn(0);
2863 }
2864 
2865 #include "petscsys.h"
2866 
2867 #undef __FUNCT__
2868 #define __FUNCT__ "MatLoad_MPIAIJ"
2869 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2870 {
2871   Mat            A;
2872   PetscScalar    *vals,*svals;
2873   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2874   MPI_Status     status;
2875   PetscErrorCode ierr;
2876   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz;
2877   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2878   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2879   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2880   PetscInt       cend,cstart,n,*rowners;
2881   int            fd;
2882 
2883   PetscFunctionBegin;
2884   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2885   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2886   if (!rank) {
2887     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2888     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2889     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2890   }
2891 
2892   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2893   M = header[1]; N = header[2];
2894   /* determine ownership of all rows */
2895   m    = M/size + ((M % size) > rank);
2896   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2897   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2898 
2899   /* First process needs enough room for process with most rows */
2900   if (!rank) {
2901     mmax       = rowners[1];
2902     for (i=2; i<size; i++) {
2903       mmax = PetscMax(mmax,rowners[i]);
2904     }
2905   } else mmax = m;
2906 
2907   rowners[0] = 0;
2908   for (i=2; i<=size; i++) {
2909     rowners[i] += rowners[i-1];
2910   }
2911   rstart = rowners[rank];
2912   rend   = rowners[rank+1];
2913 
2914   /* distribute row lengths to all processors */
2915   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2916   if (!rank) {
2917     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2918     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2919     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2920     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2921     for (j=0; j<m; j++) {
2922       procsnz[0] += ourlens[j];
2923     }
2924     for (i=1; i<size; i++) {
2925       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2926       /* calculate the number of nonzeros on each processor */
2927       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2928         procsnz[i] += rowlengths[j];
2929       }
2930       mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]);
2931       ierr   = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2932     }
2933     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2934   } else {
2935     mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr);
2936     ierr   = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2937   }
2938 
2939   if (!rank) {
2940     /* determine max buffer needed and allocate it */
2941     maxnz = 0;
2942     for (i=0; i<size; i++) {
2943       maxnz = PetscMax(maxnz,procsnz[i]);
2944     }
2945     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2946 
2947     /* read in my part of the matrix column indices  */
2948     nz   = procsnz[0];
2949     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2950     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2951 
2952     /* read in every one elses and ship off */
2953     for (i=1; i<size; i++) {
2954       nz     = procsnz[i];
2955       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2956       mpicnt = PetscMPIIntCast(nz);
2957       ierr   = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2958     }
2959     ierr = PetscFree(cols);CHKERRQ(ierr);
2960   } else {
2961     /* determine buffer space needed for message */
2962     nz = 0;
2963     for (i=0; i<m; i++) {
2964       nz += ourlens[i];
2965     }
2966     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2967 
2968     /* receive message of column indices*/
2969     mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr);
2970     ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2971     ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr);
2972     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
2973     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
2974     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
2975   }
2976 
2977   /* determine column ownership if matrix is not square */
2978   if (N != M) {
2979     n      = N/size + ((N % size) > rank);
2980     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2981     cstart = cend - n;
2982   } else {
2983     cstart = rstart;
2984     cend   = rend;
2985     n      = cend - cstart;
2986   }
2987 
2988   /* loop over local rows, determining number of off diagonal entries */
2989   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2990   jj = 0;
2991   for (i=0; i<m; i++) {
2992     for (j=0; j<ourlens[i]; j++) {
2993       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2994       jj++;
2995     }
2996   }
2997 
2998   /* create our matrix */
2999   for (i=0; i<m; i++) {
3000     ourlens[i] -= offlens[i];
3001   }
3002   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
3003   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
3004   ierr = MatSetType(A,type);CHKERRQ(ierr);
3005   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
3006 
3007   for (i=0; i<m; i++) {
3008     ourlens[i] += offlens[i];
3009   }
3010 
3011   if (!rank) {
3012     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3013 
3014     /* read in my part of the matrix numerical values  */
3015     nz   = procsnz[0];
3016     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3017 
3018     /* insert into matrix */
3019     jj      = rstart;
3020     smycols = mycols;
3021     svals   = vals;
3022     for (i=0; i<m; i++) {
3023       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3024       smycols += ourlens[i];
3025       svals   += ourlens[i];
3026       jj++;
3027     }
3028 
3029     /* read in other processors and ship out */
3030     for (i=1; i<size; i++) {
3031       nz     = procsnz[i];
3032       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3033       mpicnt = PetscMPIIntCast(nz);
3034       ierr   = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3035     }
3036     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3037   } else {
3038     /* receive numeric values */
3039     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3040 
3041     /* receive message of values*/
3042     mpicnt = PetscMPIIntCast(nz);
3043     ierr   = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
3044     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr);
3045     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
3046     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
3047     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
3048 
3049     /* insert into matrix */
3050     jj      = rstart;
3051     smycols = mycols;
3052     svals   = vals;
3053     for (i=0; i<m; i++) {
3054       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3055       smycols += ourlens[i];
3056       svals   += ourlens[i];
3057       jj++;
3058     }
3059   }
3060   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3061   ierr = PetscFree(vals);CHKERRQ(ierr);
3062   ierr = PetscFree(mycols);CHKERRQ(ierr);
3063   ierr = PetscFree(rowners);CHKERRQ(ierr);
3064 
3065   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3066   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3067   *newmat = A;
3068   PetscFunctionReturn(0);
3069 }
3070 
3071 #undef __FUNCT__
3072 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3073 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3074 {
3075   PetscErrorCode ierr;
3076   IS             iscol_local;
3077   PetscInt       csize;
3078 
3079   PetscFunctionBegin;
3080   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3081   if (call == MAT_REUSE_MATRIX) {
3082     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3083     if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3084   } else {
3085     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3086   }
3087   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3088   if (call == MAT_INITIAL_MATRIX) {
3089     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3090     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3091   }
3092   PetscFunctionReturn(0);
3093 }
3094 
3095 #undef __FUNCT__
3096 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3097 /*
3098     Not great since it makes two copies of the submatrix, first an SeqAIJ
3099   in local and then by concatenating the local matrices the end result.
3100   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3101 
3102   Note: This requires a sequential iscol with all indices.
3103 */
3104 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3105 {
3106   PetscErrorCode ierr;
3107   PetscMPIInt    rank,size;
3108   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3109   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3110   Mat            *local,M,Mreuse;
3111   MatScalar      *vwork,*aa;
3112   MPI_Comm       comm = ((PetscObject)mat)->comm;
3113   Mat_SeqAIJ     *aij;
3114 
3115 
3116   PetscFunctionBegin;
3117   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3118   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3119 
3120   if (call ==  MAT_REUSE_MATRIX) {
3121     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3122     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3123     local = &Mreuse;
3124     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3125   } else {
3126     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3127     Mreuse = *local;
3128     ierr   = PetscFree(local);CHKERRQ(ierr);
3129   }
3130 
3131   /*
3132       m - number of local rows
3133       n - number of columns (same on all processors)
3134       rstart - first row in new global matrix generated
3135   */
3136   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3137   if (call == MAT_INITIAL_MATRIX) {
3138     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3139     ii  = aij->i;
3140     jj  = aij->j;
3141 
3142     /*
3143         Determine the number of non-zeros in the diagonal and off-diagonal
3144         portions of the matrix in order to do correct preallocation
3145     */
3146 
3147     /* first get start and end of "diagonal" columns */
3148     if (csize == PETSC_DECIDE) {
3149       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3150       if (mglobal == n) { /* square matrix */
3151 	nlocal = m;
3152       } else {
3153         nlocal = n/size + ((n % size) > rank);
3154       }
3155     } else {
3156       nlocal = csize;
3157     }
3158     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3159     rstart = rend - nlocal;
3160     if (rank == size - 1 && rend != n) {
3161       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3162     }
3163 
3164     /* next, compute all the lengths */
3165     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3166     olens = dlens + m;
3167     for (i=0; i<m; i++) {
3168       jend = ii[i+1] - ii[i];
3169       olen = 0;
3170       dlen = 0;
3171       for (j=0; j<jend; j++) {
3172         if (*jj < rstart || *jj >= rend) olen++;
3173         else dlen++;
3174         jj++;
3175       }
3176       olens[i] = olen;
3177       dlens[i] = dlen;
3178     }
3179     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3180     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3181     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3182     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3183     ierr = PetscFree(dlens);CHKERRQ(ierr);
3184   } else {
3185     PetscInt ml,nl;
3186 
3187     M = *newmat;
3188     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3189     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3190     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3191     /*
3192          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3193        rather than the slower MatSetValues().
3194     */
3195     M->was_assembled = PETSC_TRUE;
3196     M->assembled     = PETSC_FALSE;
3197   }
3198   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3199   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3200   ii  = aij->i;
3201   jj  = aij->j;
3202   aa  = aij->a;
3203   for (i=0; i<m; i++) {
3204     row   = rstart + i;
3205     nz    = ii[i+1] - ii[i];
3206     cwork = jj;     jj += nz;
3207     vwork = aa;     aa += nz;
3208     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3209   }
3210 
3211   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3212   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3213   *newmat = M;
3214 
3215   /* save submatrix used in processor for next request */
3216   if (call ==  MAT_INITIAL_MATRIX) {
3217     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3218     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3219   }
3220 
3221   PetscFunctionReturn(0);
3222 }
3223 
3224 EXTERN_C_BEGIN
3225 #undef __FUNCT__
3226 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3227 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3228 {
3229   PetscInt       m,cstart, cend,j,nnz,i,d;
3230   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3231   const PetscInt *JJ;
3232   PetscScalar    *values;
3233   PetscErrorCode ierr;
3234 
3235   PetscFunctionBegin;
3236   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3237 
3238   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3239   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3240   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3241   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3242   m      = B->rmap->n;
3243   cstart = B->cmap->rstart;
3244   cend   = B->cmap->rend;
3245   rstart = B->rmap->rstart;
3246 
3247   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
3248   o_nnz = d_nnz + m;
3249 
3250 #if defined(PETSC_USE_DEBUGGING)
3251   for (i=0; i<m; i++) {
3252     nnz     = Ii[i+1]- Ii[i];
3253     JJ      = J + Ii[i];
3254     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3255     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3256     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3257     for (j=1; j<nnz; j++) {
3258       if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j);
3259     }
3260   }
3261 #endif
3262 
3263   for (i=0; i<m; i++) {
3264     nnz     = Ii[i+1]- Ii[i];
3265     JJ      = J + Ii[i];
3266     nnz_max = PetscMax(nnz_max,nnz);
3267     for (j=0; j<nnz; j++) {
3268       if (*JJ >= cstart) break;
3269       JJ++;
3270     }
3271     d = 0;
3272     for (; j<nnz; j++) {
3273       if (*JJ++ >= cend) break;
3274       d++;
3275     }
3276     d_nnz[i] = d;
3277     o_nnz[i] = nnz - d;
3278   }
3279   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3280   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
3281 
3282   if (v) values = (PetscScalar*)v;
3283   else {
3284     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3285     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3286   }
3287 
3288   for (i=0; i<m; i++) {
3289     ii   = i + rstart;
3290     nnz  = Ii[i+1]- Ii[i];
3291     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3292   }
3293   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3294   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3295 
3296   if (!v) {
3297     ierr = PetscFree(values);CHKERRQ(ierr);
3298   }
3299   PetscFunctionReturn(0);
3300 }
3301 EXTERN_C_END
3302 
3303 #undef __FUNCT__
3304 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3305 /*@
3306    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3307    (the default parallel PETSc format).
3308 
3309    Collective on MPI_Comm
3310 
3311    Input Parameters:
3312 +  B - the matrix
3313 .  i - the indices into j for the start of each local row (starts with zero)
3314 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3315 -  v - optional values in the matrix
3316 
3317    Level: developer
3318 
3319    Notes:
3320        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3321      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3322      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3323 
3324        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3325 
3326        The format which is used for the sparse matrix input, is equivalent to a
3327     row-major ordering.. i.e for the following matrix, the input data expected is
3328     as shown:
3329 
3330         1 0 0
3331         2 0 3     P0
3332        -------
3333         4 5 6     P1
3334 
3335      Process0 [P0]: rows_owned=[0,1]
3336         i =  {0,1,3}  [size = nrow+1  = 2+1]
3337         j =  {0,0,2}  [size = nz = 6]
3338         v =  {1,2,3}  [size = nz = 6]
3339 
3340      Process1 [P1]: rows_owned=[2]
3341         i =  {0,3}    [size = nrow+1  = 1+1]
3342         j =  {0,1,2}  [size = nz = 6]
3343         v =  {4,5,6}  [size = nz = 6]
3344 
3345       The column indices for each row MUST be sorted.
3346 
3347 .keywords: matrix, aij, compressed row, sparse, parallel
3348 
3349 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3350           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3351 @*/
3352 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3353 {
3354   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3355 
3356   PetscFunctionBegin;
3357   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3358   if (f) {
3359     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3360   }
3361   PetscFunctionReturn(0);
3362 }
3363 
3364 #undef __FUNCT__
3365 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3366 /*@C
3367    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3368    (the default parallel PETSc format).  For good matrix assembly performance
3369    the user should preallocate the matrix storage by setting the parameters
3370    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3371    performance can be increased by more than a factor of 50.
3372 
3373    Collective on MPI_Comm
3374 
3375    Input Parameters:
3376 +  A - the matrix
3377 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3378            (same value is used for all local rows)
3379 .  d_nnz - array containing the number of nonzeros in the various rows of the
3380            DIAGONAL portion of the local submatrix (possibly different for each row)
3381            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3382            The size of this array is equal to the number of local rows, i.e 'm'.
3383            You must leave room for the diagonal entry even if it is zero.
3384 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3385            submatrix (same value is used for all local rows).
3386 -  o_nnz - array containing the number of nonzeros in the various rows of the
3387            OFF-DIAGONAL portion of the local submatrix (possibly different for
3388            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3389            structure. The size of this array is equal to the number
3390            of local rows, i.e 'm'.
3391 
3392    If the *_nnz parameter is given then the *_nz parameter is ignored
3393 
3394    The AIJ format (also called the Yale sparse matrix format or
3395    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3396    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3397 
3398    The parallel matrix is partitioned such that the first m0 rows belong to
3399    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3400    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3401 
3402    The DIAGONAL portion of the local submatrix of a processor can be defined
3403    as the submatrix which is obtained by extraction the part corresponding
3404    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3405    first row that belongs to the processor, and r2 is the last row belonging
3406    to the this processor. This is a square mxm matrix. The remaining portion
3407    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3408 
3409    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3410 
3411    You can call MatGetInfo() to get information on how effective the preallocation was;
3412    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3413    You can also run with the option -info and look for messages with the string
3414    malloc in them to see if additional memory allocation was needed.
3415 
3416    Example usage:
3417 
3418    Consider the following 8x8 matrix with 34 non-zero values, that is
3419    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3420    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3421    as follows:
3422 
3423 .vb
3424             1  2  0  |  0  3  0  |  0  4
3425     Proc0   0  5  6  |  7  0  0  |  8  0
3426             9  0 10  | 11  0  0  | 12  0
3427     -------------------------------------
3428            13  0 14  | 15 16 17  |  0  0
3429     Proc1   0 18  0  | 19 20 21  |  0  0
3430             0  0  0  | 22 23  0  | 24  0
3431     -------------------------------------
3432     Proc2  25 26 27  |  0  0 28  | 29  0
3433            30  0  0  | 31 32 33  |  0 34
3434 .ve
3435 
3436    This can be represented as a collection of submatrices as:
3437 
3438 .vb
3439       A B C
3440       D E F
3441       G H I
3442 .ve
3443 
3444    Where the submatrices A,B,C are owned by proc0, D,E,F are
3445    owned by proc1, G,H,I are owned by proc2.
3446 
3447    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3448    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3449    The 'M','N' parameters are 8,8, and have the same values on all procs.
3450 
3451    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3452    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3453    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3454    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3455    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3456    matrix, ans [DF] as another SeqAIJ matrix.
3457 
3458    When d_nz, o_nz parameters are specified, d_nz storage elements are
3459    allocated for every row of the local diagonal submatrix, and o_nz
3460    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3461    One way to choose d_nz and o_nz is to use the max nonzerors per local
3462    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3463    In this case, the values of d_nz,o_nz are:
3464 .vb
3465      proc0 : dnz = 2, o_nz = 2
3466      proc1 : dnz = 3, o_nz = 2
3467      proc2 : dnz = 1, o_nz = 4
3468 .ve
3469    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3470    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3471    for proc3. i.e we are using 12+15+10=37 storage locations to store
3472    34 values.
3473 
3474    When d_nnz, o_nnz parameters are specified, the storage is specified
3475    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3476    In the above case the values for d_nnz,o_nnz are:
3477 .vb
3478      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3479      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3480      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3481 .ve
3482    Here the space allocated is sum of all the above values i.e 34, and
3483    hence pre-allocation is perfect.
3484 
3485    Level: intermediate
3486 
3487 .keywords: matrix, aij, compressed row, sparse, parallel
3488 
3489 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3490           MPIAIJ, MatGetInfo()
3491 @*/
3492 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3493 {
3494   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3495 
3496   PetscFunctionBegin;
3497   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3498   if (f) {
3499     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3500   }
3501   PetscFunctionReturn(0);
3502 }
3503 
3504 #undef __FUNCT__
3505 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3506 /*@
3507      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3508          CSR format the local rows.
3509 
3510    Collective on MPI_Comm
3511 
3512    Input Parameters:
3513 +  comm - MPI communicator
3514 .  m - number of local rows (Cannot be PETSC_DECIDE)
3515 .  n - This value should be the same as the local size used in creating the
3516        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3517        calculated if N is given) For square matrices n is almost always m.
3518 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3519 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3520 .   i - row indices
3521 .   j - column indices
3522 -   a - matrix values
3523 
3524    Output Parameter:
3525 .   mat - the matrix
3526 
3527    Level: intermediate
3528 
3529    Notes:
3530        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3531      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3532      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3533 
3534        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3535 
3536        The format which is used for the sparse matrix input, is equivalent to a
3537     row-major ordering.. i.e for the following matrix, the input data expected is
3538     as shown:
3539 
3540         1 0 0
3541         2 0 3     P0
3542        -------
3543         4 5 6     P1
3544 
3545      Process0 [P0]: rows_owned=[0,1]
3546         i =  {0,1,3}  [size = nrow+1  = 2+1]
3547         j =  {0,0,2}  [size = nz = 6]
3548         v =  {1,2,3}  [size = nz = 6]
3549 
3550      Process1 [P1]: rows_owned=[2]
3551         i =  {0,3}    [size = nrow+1  = 1+1]
3552         j =  {0,1,2}  [size = nz = 6]
3553         v =  {4,5,6}  [size = nz = 6]
3554 
3555 .keywords: matrix, aij, compressed row, sparse, parallel
3556 
3557 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3558           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3559 @*/
3560 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3561 {
3562   PetscErrorCode ierr;
3563 
3564  PetscFunctionBegin;
3565   if (i[0]) {
3566     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3567   }
3568   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3569   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3570   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3571   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3572   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3573   PetscFunctionReturn(0);
3574 }
3575 
3576 #undef __FUNCT__
3577 #define __FUNCT__ "MatCreateMPIAIJ"
3578 /*@C
3579    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3580    (the default parallel PETSc format).  For good matrix assembly performance
3581    the user should preallocate the matrix storage by setting the parameters
3582    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3583    performance can be increased by more than a factor of 50.
3584 
3585    Collective on MPI_Comm
3586 
3587    Input Parameters:
3588 +  comm - MPI communicator
3589 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3590            This value should be the same as the local size used in creating the
3591            y vector for the matrix-vector product y = Ax.
3592 .  n - This value should be the same as the local size used in creating the
3593        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3594        calculated if N is given) For square matrices n is almost always m.
3595 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3596 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3597 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3598            (same value is used for all local rows)
3599 .  d_nnz - array containing the number of nonzeros in the various rows of the
3600            DIAGONAL portion of the local submatrix (possibly different for each row)
3601            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3602            The size of this array is equal to the number of local rows, i.e 'm'.
3603            You must leave room for the diagonal entry even if it is zero.
3604 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3605            submatrix (same value is used for all local rows).
3606 -  o_nnz - array containing the number of nonzeros in the various rows of the
3607            OFF-DIAGONAL portion of the local submatrix (possibly different for
3608            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3609            structure. The size of this array is equal to the number
3610            of local rows, i.e 'm'.
3611 
3612    Output Parameter:
3613 .  A - the matrix
3614 
3615    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3616    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3617    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3618 
3619    Notes:
3620    If the *_nnz parameter is given then the *_nz parameter is ignored
3621 
3622    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3623    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3624    storage requirements for this matrix.
3625 
3626    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3627    processor than it must be used on all processors that share the object for
3628    that argument.
3629 
3630    The user MUST specify either the local or global matrix dimensions
3631    (possibly both).
3632 
3633    The parallel matrix is partitioned across processors such that the
3634    first m0 rows belong to process 0, the next m1 rows belong to
3635    process 1, the next m2 rows belong to process 2 etc.. where
3636    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3637    values corresponding to [m x N] submatrix.
3638 
3639    The columns are logically partitioned with the n0 columns belonging
3640    to 0th partition, the next n1 columns belonging to the next
3641    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3642 
3643    The DIAGONAL portion of the local submatrix on any given processor
3644    is the submatrix corresponding to the rows and columns m,n
3645    corresponding to the given processor. i.e diagonal matrix on
3646    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3647    etc. The remaining portion of the local submatrix [m x (N-n)]
3648    constitute the OFF-DIAGONAL portion. The example below better
3649    illustrates this concept.
3650 
3651    For a square global matrix we define each processor's diagonal portion
3652    to be its local rows and the corresponding columns (a square submatrix);
3653    each processor's off-diagonal portion encompasses the remainder of the
3654    local matrix (a rectangular submatrix).
3655 
3656    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3657 
3658    When calling this routine with a single process communicator, a matrix of
3659    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3660    type of communicator, use the construction mechanism:
3661      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3662 
3663    By default, this format uses inodes (identical nodes) when possible.
3664    We search for consecutive rows with the same nonzero structure, thereby
3665    reusing matrix information to achieve increased efficiency.
3666 
3667    Options Database Keys:
3668 +  -mat_no_inode  - Do not use inodes
3669 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3670 -  -mat_aij_oneindex - Internally use indexing starting at 1
3671         rather than 0.  Note that when calling MatSetValues(),
3672         the user still MUST index entries starting at 0!
3673 
3674 
3675    Example usage:
3676 
3677    Consider the following 8x8 matrix with 34 non-zero values, that is
3678    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3679    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3680    as follows:
3681 
3682 .vb
3683             1  2  0  |  0  3  0  |  0  4
3684     Proc0   0  5  6  |  7  0  0  |  8  0
3685             9  0 10  | 11  0  0  | 12  0
3686     -------------------------------------
3687            13  0 14  | 15 16 17  |  0  0
3688     Proc1   0 18  0  | 19 20 21  |  0  0
3689             0  0  0  | 22 23  0  | 24  0
3690     -------------------------------------
3691     Proc2  25 26 27  |  0  0 28  | 29  0
3692            30  0  0  | 31 32 33  |  0 34
3693 .ve
3694 
3695    This can be represented as a collection of submatrices as:
3696 
3697 .vb
3698       A B C
3699       D E F
3700       G H I
3701 .ve
3702 
3703    Where the submatrices A,B,C are owned by proc0, D,E,F are
3704    owned by proc1, G,H,I are owned by proc2.
3705 
3706    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3707    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3708    The 'M','N' parameters are 8,8, and have the same values on all procs.
3709 
3710    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3711    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3712    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3713    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3714    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3715    matrix, ans [DF] as another SeqAIJ matrix.
3716 
3717    When d_nz, o_nz parameters are specified, d_nz storage elements are
3718    allocated for every row of the local diagonal submatrix, and o_nz
3719    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3720    One way to choose d_nz and o_nz is to use the max nonzerors per local
3721    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3722    In this case, the values of d_nz,o_nz are:
3723 .vb
3724      proc0 : dnz = 2, o_nz = 2
3725      proc1 : dnz = 3, o_nz = 2
3726      proc2 : dnz = 1, o_nz = 4
3727 .ve
3728    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3729    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3730    for proc3. i.e we are using 12+15+10=37 storage locations to store
3731    34 values.
3732 
3733    When d_nnz, o_nnz parameters are specified, the storage is specified
3734    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3735    In the above case the values for d_nnz,o_nnz are:
3736 .vb
3737      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3738      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3739      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3740 .ve
3741    Here the space allocated is sum of all the above values i.e 34, and
3742    hence pre-allocation is perfect.
3743 
3744    Level: intermediate
3745 
3746 .keywords: matrix, aij, compressed row, sparse, parallel
3747 
3748 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3749           MPIAIJ, MatCreateMPIAIJWithArrays()
3750 @*/
3751 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3752 {
3753   PetscErrorCode ierr;
3754   PetscMPIInt    size;
3755 
3756   PetscFunctionBegin;
3757   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3758   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3759   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3760   if (size > 1) {
3761     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3762     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3763   } else {
3764     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3765     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3766   }
3767   PetscFunctionReturn(0);
3768 }
3769 
3770 #undef __FUNCT__
3771 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3772 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3773 {
3774   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3775 
3776   PetscFunctionBegin;
3777   *Ad     = a->A;
3778   *Ao     = a->B;
3779   *colmap = a->garray;
3780   PetscFunctionReturn(0);
3781 }
3782 
3783 #undef __FUNCT__
3784 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3785 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3786 {
3787   PetscErrorCode ierr;
3788   PetscInt       i;
3789   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3790 
3791   PetscFunctionBegin;
3792   if (coloring->ctype == IS_COLORING_GLOBAL) {
3793     ISColoringValue *allcolors,*colors;
3794     ISColoring      ocoloring;
3795 
3796     /* set coloring for diagonal portion */
3797     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3798 
3799     /* set coloring for off-diagonal portion */
3800     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3801     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3802     for (i=0; i<a->B->cmap->n; i++) {
3803       colors[i] = allcolors[a->garray[i]];
3804     }
3805     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3806     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3807     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3808     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3809   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3810     ISColoringValue *colors;
3811     PetscInt        *larray;
3812     ISColoring      ocoloring;
3813 
3814     /* set coloring for diagonal portion */
3815     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3816     for (i=0; i<a->A->cmap->n; i++) {
3817       larray[i] = i + A->cmap->rstart;
3818     }
3819     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3820     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3821     for (i=0; i<a->A->cmap->n; i++) {
3822       colors[i] = coloring->colors[larray[i]];
3823     }
3824     ierr = PetscFree(larray);CHKERRQ(ierr);
3825     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3826     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3827     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3828 
3829     /* set coloring for off-diagonal portion */
3830     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3831     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3832     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3833     for (i=0; i<a->B->cmap->n; i++) {
3834       colors[i] = coloring->colors[larray[i]];
3835     }
3836     ierr = PetscFree(larray);CHKERRQ(ierr);
3837     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3838     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3839     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3840   } else {
3841     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3842   }
3843 
3844   PetscFunctionReturn(0);
3845 }
3846 
3847 #if defined(PETSC_HAVE_ADIC)
3848 #undef __FUNCT__
3849 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3850 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3851 {
3852   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3853   PetscErrorCode ierr;
3854 
3855   PetscFunctionBegin;
3856   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3857   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3858   PetscFunctionReturn(0);
3859 }
3860 #endif
3861 
3862 #undef __FUNCT__
3863 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3864 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3865 {
3866   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3867   PetscErrorCode ierr;
3868 
3869   PetscFunctionBegin;
3870   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3871   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3872   PetscFunctionReturn(0);
3873 }
3874 
3875 #undef __FUNCT__
3876 #define __FUNCT__ "MatMerge"
3877 /*@
3878       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3879                  matrices from each processor
3880 
3881     Collective on MPI_Comm
3882 
3883    Input Parameters:
3884 +    comm - the communicators the parallel matrix will live on
3885 .    inmat - the input sequential matrices
3886 .    n - number of local columns (or PETSC_DECIDE)
3887 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3888 
3889    Output Parameter:
3890 .    outmat - the parallel matrix generated
3891 
3892     Level: advanced
3893 
3894    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3895 
3896 @*/
3897 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3898 {
3899   PetscErrorCode ierr;
3900   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3901   PetscInt       *indx;
3902   PetscScalar    *values;
3903 
3904   PetscFunctionBegin;
3905   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3906   if (scall == MAT_INITIAL_MATRIX){
3907     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3908     if (n == PETSC_DECIDE){
3909       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3910     }
3911     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3912     rstart -= m;
3913 
3914     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3915     for (i=0;i<m;i++) {
3916       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3917       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3918       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3919     }
3920     /* This routine will ONLY return MPIAIJ type matrix */
3921     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3922     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3923     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3924     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3925     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3926 
3927   } else if (scall == MAT_REUSE_MATRIX){
3928     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3929   } else {
3930     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3931   }
3932 
3933   for (i=0;i<m;i++) {
3934     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3935     Ii    = i + rstart;
3936     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3937     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3938   }
3939   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3940   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3941   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3942 
3943   PetscFunctionReturn(0);
3944 }
3945 
3946 #undef __FUNCT__
3947 #define __FUNCT__ "MatFileSplit"
3948 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3949 {
3950   PetscErrorCode    ierr;
3951   PetscMPIInt       rank;
3952   PetscInt          m,N,i,rstart,nnz;
3953   size_t            len;
3954   const PetscInt    *indx;
3955   PetscViewer       out;
3956   char              *name;
3957   Mat               B;
3958   const PetscScalar *values;
3959 
3960   PetscFunctionBegin;
3961   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3962   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3963   /* Should this be the type of the diagonal block of A? */
3964   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3965   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3966   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3967   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3968   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3969   for (i=0;i<m;i++) {
3970     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3971     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3972     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3973   }
3974   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3975   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3976 
3977   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3978   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3979   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3980   sprintf(name,"%s.%d",outfile,rank);
3981   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3982   ierr = PetscFree(name);
3983   ierr = MatView(B,out);CHKERRQ(ierr);
3984   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3985   ierr = MatDestroy(B);CHKERRQ(ierr);
3986   PetscFunctionReturn(0);
3987 }
3988 
3989 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3990 #undef __FUNCT__
3991 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3992 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3993 {
3994   PetscErrorCode       ierr;
3995   Mat_Merge_SeqsToMPI  *merge;
3996   PetscContainer       container;
3997 
3998   PetscFunctionBegin;
3999   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4000   if (container) {
4001     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4002     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4003     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4004     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4005     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4006     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4007     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4008     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4009     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4010     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4011     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4012     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
4013 
4014     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4015     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4016   }
4017   ierr = PetscFree(merge);CHKERRQ(ierr);
4018 
4019   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4020   PetscFunctionReturn(0);
4021 }
4022 
4023 #include "../src/mat/utils/freespace.h"
4024 #include "petscbt.h"
4025 
4026 #undef __FUNCT__
4027 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4028 /*@C
4029       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4030                  matrices from each processor
4031 
4032     Collective on MPI_Comm
4033 
4034    Input Parameters:
4035 +    comm - the communicators the parallel matrix will live on
4036 .    seqmat - the input sequential matrices
4037 .    m - number of local rows (or PETSC_DECIDE)
4038 .    n - number of local columns (or PETSC_DECIDE)
4039 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4040 
4041    Output Parameter:
4042 .    mpimat - the parallel matrix generated
4043 
4044     Level: advanced
4045 
4046    Notes:
4047      The dimensions of the sequential matrix in each processor MUST be the same.
4048      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4049      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4050 @*/
4051 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4052 {
4053   PetscErrorCode       ierr;
4054   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4055   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4056   PetscMPIInt          size,rank,taga,*len_s;
4057   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4058   PetscInt             proc,m;
4059   PetscInt             **buf_ri,**buf_rj;
4060   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4061   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4062   MPI_Request          *s_waits,*r_waits;
4063   MPI_Status           *status;
4064   MatScalar            *aa=a->a;
4065   MatScalar            **abuf_r,*ba_i;
4066   Mat_Merge_SeqsToMPI  *merge;
4067   PetscContainer       container;
4068 
4069   PetscFunctionBegin;
4070   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4071 
4072   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4073   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4074 
4075   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4076   if (container) {
4077     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4078   }
4079   bi     = merge->bi;
4080   bj     = merge->bj;
4081   buf_ri = merge->buf_ri;
4082   buf_rj = merge->buf_rj;
4083 
4084   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4085   owners = merge->rowmap->range;
4086   len_s  = merge->len_s;
4087 
4088   /* send and recv matrix values */
4089   /*-----------------------------*/
4090   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4091   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4092 
4093   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4094   for (proc=0,k=0; proc<size; proc++){
4095     if (!len_s[proc]) continue;
4096     i = owners[proc];
4097     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4098     k++;
4099   }
4100 
4101   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4102   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4103   ierr = PetscFree(status);CHKERRQ(ierr);
4104 
4105   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4106   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4107 
4108   /* insert mat values of mpimat */
4109   /*----------------------------*/
4110   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4111   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4112   nextrow = buf_ri_k + merge->nrecv;
4113   nextai  = nextrow + merge->nrecv;
4114 
4115   for (k=0; k<merge->nrecv; k++){
4116     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4117     nrows = *(buf_ri_k[k]);
4118     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4119     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4120   }
4121 
4122   /* set values of ba */
4123   m = merge->rowmap->n;
4124   for (i=0; i<m; i++) {
4125     arow = owners[rank] + i;
4126     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4127     bnzi = bi[i+1] - bi[i];
4128     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4129 
4130     /* add local non-zero vals of this proc's seqmat into ba */
4131     anzi = ai[arow+1] - ai[arow];
4132     aj   = a->j + ai[arow];
4133     aa   = a->a + ai[arow];
4134     nextaj = 0;
4135     for (j=0; nextaj<anzi; j++){
4136       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4137         ba_i[j] += aa[nextaj++];
4138       }
4139     }
4140 
4141     /* add received vals into ba */
4142     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4143       /* i-th row */
4144       if (i == *nextrow[k]) {
4145         anzi = *(nextai[k]+1) - *nextai[k];
4146         aj   = buf_rj[k] + *(nextai[k]);
4147         aa   = abuf_r[k] + *(nextai[k]);
4148         nextaj = 0;
4149         for (j=0; nextaj<anzi; j++){
4150           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4151             ba_i[j] += aa[nextaj++];
4152           }
4153         }
4154         nextrow[k]++; nextai[k]++;
4155       }
4156     }
4157     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4158   }
4159   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4160   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4161 
4162   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4163   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4164   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4165   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4166   PetscFunctionReturn(0);
4167 }
4168 
4169 #undef __FUNCT__
4170 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4171 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4172 {
4173   PetscErrorCode       ierr;
4174   Mat                  B_mpi;
4175   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4176   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4177   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4178   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4179   PetscInt             len,proc,*dnz,*onz;
4180   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4181   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4182   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4183   MPI_Status           *status;
4184   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4185   PetscBT              lnkbt;
4186   Mat_Merge_SeqsToMPI  *merge;
4187   PetscContainer       container;
4188 
4189   PetscFunctionBegin;
4190   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4191 
4192   /* make sure it is a PETSc comm */
4193   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4194   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4195   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4196 
4197   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4198   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4199 
4200   /* determine row ownership */
4201   /*---------------------------------------------------------*/
4202   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4203   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4204   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4205   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4206   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4207   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4208   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4209 
4210   m      = merge->rowmap->n;
4211   M      = merge->rowmap->N;
4212   owners = merge->rowmap->range;
4213 
4214   /* determine the number of messages to send, their lengths */
4215   /*---------------------------------------------------------*/
4216   len_s  = merge->len_s;
4217 
4218   len = 0;  /* length of buf_si[] */
4219   merge->nsend = 0;
4220   for (proc=0; proc<size; proc++){
4221     len_si[proc] = 0;
4222     if (proc == rank){
4223       len_s[proc] = 0;
4224     } else {
4225       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4226       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4227     }
4228     if (len_s[proc]) {
4229       merge->nsend++;
4230       nrows = 0;
4231       for (i=owners[proc]; i<owners[proc+1]; i++){
4232         if (ai[i+1] > ai[i]) nrows++;
4233       }
4234       len_si[proc] = 2*(nrows+1);
4235       len += len_si[proc];
4236     }
4237   }
4238 
4239   /* determine the number and length of messages to receive for ij-structure */
4240   /*-------------------------------------------------------------------------*/
4241   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4242   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4243 
4244   /* post the Irecv of j-structure */
4245   /*-------------------------------*/
4246   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4247   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4248 
4249   /* post the Isend of j-structure */
4250   /*--------------------------------*/
4251   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
4252   sj_waits = si_waits + merge->nsend;
4253 
4254   for (proc=0, k=0; proc<size; proc++){
4255     if (!len_s[proc]) continue;
4256     i = owners[proc];
4257     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4258     k++;
4259   }
4260 
4261   /* receives and sends of j-structure are complete */
4262   /*------------------------------------------------*/
4263   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4264   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4265 
4266   /* send and recv i-structure */
4267   /*---------------------------*/
4268   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4269   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4270 
4271   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4272   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4273   for (proc=0,k=0; proc<size; proc++){
4274     if (!len_s[proc]) continue;
4275     /* form outgoing message for i-structure:
4276          buf_si[0]:                 nrows to be sent
4277                [1:nrows]:           row index (global)
4278                [nrows+1:2*nrows+1]: i-structure index
4279     */
4280     /*-------------------------------------------*/
4281     nrows = len_si[proc]/2 - 1;
4282     buf_si_i    = buf_si + nrows+1;
4283     buf_si[0]   = nrows;
4284     buf_si_i[0] = 0;
4285     nrows = 0;
4286     for (i=owners[proc]; i<owners[proc+1]; i++){
4287       anzi = ai[i+1] - ai[i];
4288       if (anzi) {
4289         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4290         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4291         nrows++;
4292       }
4293     }
4294     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4295     k++;
4296     buf_si += len_si[proc];
4297   }
4298 
4299   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4300   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4301 
4302   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4303   for (i=0; i<merge->nrecv; i++){
4304     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4305   }
4306 
4307   ierr = PetscFree(len_si);CHKERRQ(ierr);
4308   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4309   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4310   ierr = PetscFree(si_waits);CHKERRQ(ierr);
4311   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4312   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4313   ierr = PetscFree(status);CHKERRQ(ierr);
4314 
4315   /* compute a local seq matrix in each processor */
4316   /*----------------------------------------------*/
4317   /* allocate bi array and free space for accumulating nonzero column info */
4318   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4319   bi[0] = 0;
4320 
4321   /* create and initialize a linked list */
4322   nlnk = N+1;
4323   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4324 
4325   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4326   len = 0;
4327   len  = ai[owners[rank+1]] - ai[owners[rank]];
4328   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4329   current_space = free_space;
4330 
4331   /* determine symbolic info for each local row */
4332   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4333   nextrow = buf_ri_k + merge->nrecv;
4334   nextai  = nextrow + merge->nrecv;
4335   for (k=0; k<merge->nrecv; k++){
4336     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4337     nrows = *buf_ri_k[k];
4338     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4339     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4340   }
4341 
4342   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4343   len = 0;
4344   for (i=0;i<m;i++) {
4345     bnzi   = 0;
4346     /* add local non-zero cols of this proc's seqmat into lnk */
4347     arow   = owners[rank] + i;
4348     anzi   = ai[arow+1] - ai[arow];
4349     aj     = a->j + ai[arow];
4350     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4351     bnzi += nlnk;
4352     /* add received col data into lnk */
4353     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4354       if (i == *nextrow[k]) { /* i-th row */
4355         anzi = *(nextai[k]+1) - *nextai[k];
4356         aj   = buf_rj[k] + *nextai[k];
4357         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4358         bnzi += nlnk;
4359         nextrow[k]++; nextai[k]++;
4360       }
4361     }
4362     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4363 
4364     /* if free space is not available, make more free space */
4365     if (current_space->local_remaining<bnzi) {
4366       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4367       nspacedouble++;
4368     }
4369     /* copy data into free space, then initialize lnk */
4370     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4371     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4372 
4373     current_space->array           += bnzi;
4374     current_space->local_used      += bnzi;
4375     current_space->local_remaining -= bnzi;
4376 
4377     bi[i+1] = bi[i] + bnzi;
4378   }
4379 
4380   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4381 
4382   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4383   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4384   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4385 
4386   /* create symbolic parallel matrix B_mpi */
4387   /*---------------------------------------*/
4388   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4389   if (n==PETSC_DECIDE) {
4390     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4391   } else {
4392     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4393   }
4394   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4395   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4396   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4397 
4398   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4399   B_mpi->assembled     = PETSC_FALSE;
4400   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4401   merge->bi            = bi;
4402   merge->bj            = bj;
4403   merge->buf_ri        = buf_ri;
4404   merge->buf_rj        = buf_rj;
4405   merge->coi           = PETSC_NULL;
4406   merge->coj           = PETSC_NULL;
4407   merge->owners_co     = PETSC_NULL;
4408 
4409   /* attach the supporting struct to B_mpi for reuse */
4410   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4411   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4412   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4413   *mpimat = B_mpi;
4414 
4415   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4416   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4417   PetscFunctionReturn(0);
4418 }
4419 
4420 #undef __FUNCT__
4421 #define __FUNCT__ "MatMerge_SeqsToMPI"
4422 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4423 {
4424   PetscErrorCode   ierr;
4425 
4426   PetscFunctionBegin;
4427   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4428   if (scall == MAT_INITIAL_MATRIX){
4429     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4430   }
4431   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4432   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4433   PetscFunctionReturn(0);
4434 }
4435 
4436 #undef __FUNCT__
4437 #define __FUNCT__ "MatGetLocalMat"
4438 /*@
4439      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4440 
4441     Not Collective
4442 
4443    Input Parameters:
4444 +    A - the matrix
4445 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4446 
4447    Output Parameter:
4448 .    A_loc - the local sequential matrix generated
4449 
4450     Level: developer
4451 
4452 @*/
4453 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4454 {
4455   PetscErrorCode  ierr;
4456   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4457   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4458   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4459   MatScalar       *aa=a->a,*ba=b->a,*cam;
4460   PetscScalar     *ca;
4461   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4462   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4463 
4464   PetscFunctionBegin;
4465   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4466   if (scall == MAT_INITIAL_MATRIX){
4467     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4468     ci[0] = 0;
4469     for (i=0; i<am; i++){
4470       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4471     }
4472     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4473     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4474     k = 0;
4475     for (i=0; i<am; i++) {
4476       ncols_o = bi[i+1] - bi[i];
4477       ncols_d = ai[i+1] - ai[i];
4478       /* off-diagonal portion of A */
4479       for (jo=0; jo<ncols_o; jo++) {
4480         col = cmap[*bj];
4481         if (col >= cstart) break;
4482         cj[k]   = col; bj++;
4483         ca[k++] = *ba++;
4484       }
4485       /* diagonal portion of A */
4486       for (j=0; j<ncols_d; j++) {
4487         cj[k]   = cstart + *aj++;
4488         ca[k++] = *aa++;
4489       }
4490       /* off-diagonal portion of A */
4491       for (j=jo; j<ncols_o; j++) {
4492         cj[k]   = cmap[*bj++];
4493         ca[k++] = *ba++;
4494       }
4495     }
4496     /* put together the new matrix */
4497     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4498     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4499     /* Since these are PETSc arrays, change flags to free them as necessary. */
4500     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4501     mat->free_a  = PETSC_TRUE;
4502     mat->free_ij = PETSC_TRUE;
4503     mat->nonew   = 0;
4504   } else if (scall == MAT_REUSE_MATRIX){
4505     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4506     ci = mat->i; cj = mat->j; cam = mat->a;
4507     for (i=0; i<am; i++) {
4508       /* off-diagonal portion of A */
4509       ncols_o = bi[i+1] - bi[i];
4510       for (jo=0; jo<ncols_o; jo++) {
4511         col = cmap[*bj];
4512         if (col >= cstart) break;
4513         *cam++ = *ba++; bj++;
4514       }
4515       /* diagonal portion of A */
4516       ncols_d = ai[i+1] - ai[i];
4517       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4518       /* off-diagonal portion of A */
4519       for (j=jo; j<ncols_o; j++) {
4520         *cam++ = *ba++; bj++;
4521       }
4522     }
4523   } else {
4524     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4525   }
4526 
4527   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4528   PetscFunctionReturn(0);
4529 }
4530 
4531 #undef __FUNCT__
4532 #define __FUNCT__ "MatGetLocalMatCondensed"
4533 /*@C
4534      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4535 
4536     Not Collective
4537 
4538    Input Parameters:
4539 +    A - the matrix
4540 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4541 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4542 
4543    Output Parameter:
4544 .    A_loc - the local sequential matrix generated
4545 
4546     Level: developer
4547 
4548 @*/
4549 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4550 {
4551   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4552   PetscErrorCode    ierr;
4553   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4554   IS                isrowa,iscola;
4555   Mat               *aloc;
4556 
4557   PetscFunctionBegin;
4558   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4559   if (!row){
4560     start = A->rmap->rstart; end = A->rmap->rend;
4561     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4562   } else {
4563     isrowa = *row;
4564   }
4565   if (!col){
4566     start = A->cmap->rstart;
4567     cmap  = a->garray;
4568     nzA   = a->A->cmap->n;
4569     nzB   = a->B->cmap->n;
4570     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4571     ncols = 0;
4572     for (i=0; i<nzB; i++) {
4573       if (cmap[i] < start) idx[ncols++] = cmap[i];
4574       else break;
4575     }
4576     imark = i;
4577     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4578     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4579     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4580     ierr = PetscFree(idx);CHKERRQ(ierr);
4581   } else {
4582     iscola = *col;
4583   }
4584   if (scall != MAT_INITIAL_MATRIX){
4585     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4586     aloc[0] = *A_loc;
4587   }
4588   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4589   *A_loc = aloc[0];
4590   ierr = PetscFree(aloc);CHKERRQ(ierr);
4591   if (!row){
4592     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4593   }
4594   if (!col){
4595     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4596   }
4597   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4598   PetscFunctionReturn(0);
4599 }
4600 
4601 #undef __FUNCT__
4602 #define __FUNCT__ "MatGetBrowsOfAcols"
4603 /*@C
4604     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4605 
4606     Collective on Mat
4607 
4608    Input Parameters:
4609 +    A,B - the matrices in mpiaij format
4610 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4611 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4612 
4613    Output Parameter:
4614 +    rowb, colb - index sets of rows and columns of B to extract
4615 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4616 -    B_seq - the sequential matrix generated
4617 
4618     Level: developer
4619 
4620 @*/
4621 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4622 {
4623   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4624   PetscErrorCode    ierr;
4625   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4626   IS                isrowb,iscolb;
4627   Mat               *bseq;
4628 
4629   PetscFunctionBegin;
4630   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4631     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4632   }
4633   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4634 
4635   if (scall == MAT_INITIAL_MATRIX){
4636     start = A->cmap->rstart;
4637     cmap  = a->garray;
4638     nzA   = a->A->cmap->n;
4639     nzB   = a->B->cmap->n;
4640     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4641     ncols = 0;
4642     for (i=0; i<nzB; i++) {  /* row < local row index */
4643       if (cmap[i] < start) idx[ncols++] = cmap[i];
4644       else break;
4645     }
4646     imark = i;
4647     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4648     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4649     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4650     ierr = PetscFree(idx);CHKERRQ(ierr);
4651     *brstart = imark;
4652     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4653   } else {
4654     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4655     isrowb = *rowb; iscolb = *colb;
4656     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4657     bseq[0] = *B_seq;
4658   }
4659   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4660   *B_seq = bseq[0];
4661   ierr = PetscFree(bseq);CHKERRQ(ierr);
4662   if (!rowb){
4663     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4664   } else {
4665     *rowb = isrowb;
4666   }
4667   if (!colb){
4668     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4669   } else {
4670     *colb = iscolb;
4671   }
4672   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4673   PetscFunctionReturn(0);
4674 }
4675 
4676 #undef __FUNCT__
4677 #define __FUNCT__ "MatGetBrowsOfAoCols"
4678 /*@C
4679     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4680     of the OFF-DIAGONAL portion of local A
4681 
4682     Collective on Mat
4683 
4684    Input Parameters:
4685 +    A,B - the matrices in mpiaij format
4686 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4687 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4688 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4689 
4690    Output Parameter:
4691 +    B_oth - the sequential matrix generated
4692 
4693     Level: developer
4694 
4695 @*/
4696 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth)
4697 {
4698   VecScatter_MPI_General *gen_to,*gen_from;
4699   PetscErrorCode         ierr;
4700   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4701   Mat_SeqAIJ             *b_oth;
4702   VecScatter             ctx=a->Mvctx;
4703   MPI_Comm               comm=((PetscObject)ctx)->comm;
4704   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4705   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4706   PetscScalar            *rvalues,*svalues;
4707   MatScalar              *b_otha,*bufa,*bufA;
4708   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4709   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4710   MPI_Status             *sstatus,rstatus;
4711   PetscMPIInt            jj;
4712   PetscInt               *cols,sbs,rbs;
4713   PetscScalar            *vals;
4714 
4715   PetscFunctionBegin;
4716   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4717     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4718   }
4719   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4720   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4721 
4722   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4723   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4724   rvalues  = gen_from->values; /* holds the length of receiving row */
4725   svalues  = gen_to->values;   /* holds the length of sending row */
4726   nrecvs   = gen_from->n;
4727   nsends   = gen_to->n;
4728 
4729   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4730   srow     = gen_to->indices;   /* local row index to be sent */
4731   sstarts  = gen_to->starts;
4732   sprocs   = gen_to->procs;
4733   sstatus  = gen_to->sstatus;
4734   sbs      = gen_to->bs;
4735   rstarts  = gen_from->starts;
4736   rprocs   = gen_from->procs;
4737   rbs      = gen_from->bs;
4738 
4739   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4740   if (scall == MAT_INITIAL_MATRIX){
4741     /* i-array */
4742     /*---------*/
4743     /*  post receives */
4744     for (i=0; i<nrecvs; i++){
4745       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4746       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4747       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4748     }
4749 
4750     /* pack the outgoing message */
4751     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4752     rstartsj = sstartsj + nsends +1;
4753     sstartsj[0] = 0;  rstartsj[0] = 0;
4754     len = 0; /* total length of j or a array to be sent */
4755     k = 0;
4756     for (i=0; i<nsends; i++){
4757       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4758       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4759       for (j=0; j<nrows; j++) {
4760         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4761         for (l=0; l<sbs; l++){
4762           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4763           rowlen[j*sbs+l] = ncols;
4764           len += ncols;
4765           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4766         }
4767         k++;
4768       }
4769       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4770       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4771     }
4772     /* recvs and sends of i-array are completed */
4773     i = nrecvs;
4774     while (i--) {
4775       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4776     }
4777     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4778 
4779     /* allocate buffers for sending j and a arrays */
4780     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4781     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4782 
4783     /* create i-array of B_oth */
4784     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4785     b_othi[0] = 0;
4786     len = 0; /* total length of j or a array to be received */
4787     k = 0;
4788     for (i=0; i<nrecvs; i++){
4789       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4790       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4791       for (j=0; j<nrows; j++) {
4792         b_othi[k+1] = b_othi[k] + rowlen[j];
4793         len += rowlen[j]; k++;
4794       }
4795       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4796     }
4797 
4798     /* allocate space for j and a arrrays of B_oth */
4799     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4800     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4801 
4802     /* j-array */
4803     /*---------*/
4804     /*  post receives of j-array */
4805     for (i=0; i<nrecvs; i++){
4806       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4807       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4808     }
4809 
4810     /* pack the outgoing message j-array */
4811     k = 0;
4812     for (i=0; i<nsends; i++){
4813       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4814       bufJ = bufj+sstartsj[i];
4815       for (j=0; j<nrows; j++) {
4816         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4817         for (ll=0; ll<sbs; ll++){
4818           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4819           for (l=0; l<ncols; l++){
4820             *bufJ++ = cols[l];
4821           }
4822           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4823         }
4824       }
4825       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4826     }
4827 
4828     /* recvs and sends of j-array are completed */
4829     i = nrecvs;
4830     while (i--) {
4831       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4832     }
4833     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4834   } else if (scall == MAT_REUSE_MATRIX){
4835     sstartsj = *startsj;
4836     rstartsj = sstartsj + nsends +1;
4837     bufa     = *bufa_ptr;
4838     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4839     b_otha   = b_oth->a;
4840   } else {
4841     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4842   }
4843 
4844   /* a-array */
4845   /*---------*/
4846   /*  post receives of a-array */
4847   for (i=0; i<nrecvs; i++){
4848     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4849     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4850   }
4851 
4852   /* pack the outgoing message a-array */
4853   k = 0;
4854   for (i=0; i<nsends; i++){
4855     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4856     bufA = bufa+sstartsj[i];
4857     for (j=0; j<nrows; j++) {
4858       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4859       for (ll=0; ll<sbs; ll++){
4860         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4861         for (l=0; l<ncols; l++){
4862           *bufA++ = vals[l];
4863         }
4864         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4865       }
4866     }
4867     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4868   }
4869   /* recvs and sends of a-array are completed */
4870   i = nrecvs;
4871   while (i--) {
4872     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4873   }
4874   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4875   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4876 
4877   if (scall == MAT_INITIAL_MATRIX){
4878     /* put together the new matrix */
4879     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4880 
4881     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4882     /* Since these are PETSc arrays, change flags to free them as necessary. */
4883     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4884     b_oth->free_a  = PETSC_TRUE;
4885     b_oth->free_ij = PETSC_TRUE;
4886     b_oth->nonew   = 0;
4887 
4888     ierr = PetscFree(bufj);CHKERRQ(ierr);
4889     if (!startsj || !bufa_ptr){
4890       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4891       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4892     } else {
4893       *startsj  = sstartsj;
4894       *bufa_ptr = bufa;
4895     }
4896   }
4897   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4898   PetscFunctionReturn(0);
4899 }
4900 
4901 #undef __FUNCT__
4902 #define __FUNCT__ "MatGetCommunicationStructs"
4903 /*@C
4904   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4905 
4906   Not Collective
4907 
4908   Input Parameters:
4909 . A - The matrix in mpiaij format
4910 
4911   Output Parameter:
4912 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4913 . colmap - A map from global column index to local index into lvec
4914 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4915 
4916   Level: developer
4917 
4918 @*/
4919 #if defined (PETSC_USE_CTABLE)
4920 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4921 #else
4922 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4923 #endif
4924 {
4925   Mat_MPIAIJ *a;
4926 
4927   PetscFunctionBegin;
4928   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4929   PetscValidPointer(lvec, 2)
4930   PetscValidPointer(colmap, 3)
4931   PetscValidPointer(multScatter, 4)
4932   a = (Mat_MPIAIJ *) A->data;
4933   if (lvec) *lvec = a->lvec;
4934   if (colmap) *colmap = a->colmap;
4935   if (multScatter) *multScatter = a->Mvctx;
4936   PetscFunctionReturn(0);
4937 }
4938 
4939 EXTERN_C_BEGIN
4940 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4941 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4942 EXTERN_C_END
4943 
4944 #undef __FUNCT__
4945 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4946 /*
4947     Computes (B'*A')' since computing B*A directly is untenable
4948 
4949                n                       p                          p
4950         (              )       (              )         (                  )
4951       m (      A       )  *  n (       B      )   =   m (         C        )
4952         (              )       (              )         (                  )
4953 
4954 */
4955 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4956 {
4957   PetscErrorCode     ierr;
4958   Mat                At,Bt,Ct;
4959 
4960   PetscFunctionBegin;
4961   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4962   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4963   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4964   ierr = MatDestroy(At);CHKERRQ(ierr);
4965   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4966   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4967   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4968   PetscFunctionReturn(0);
4969 }
4970 
4971 #undef __FUNCT__
4972 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4973 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4974 {
4975   PetscErrorCode ierr;
4976   PetscInt       m=A->rmap->n,n=B->cmap->n;
4977   Mat            Cmat;
4978 
4979   PetscFunctionBegin;
4980   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4981   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4982   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4983   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4984   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4985   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4986   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4987   *C   = Cmat;
4988   PetscFunctionReturn(0);
4989 }
4990 
4991 /* ----------------------------------------------------------------*/
4992 #undef __FUNCT__
4993 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4994 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4995 {
4996   PetscErrorCode ierr;
4997 
4998   PetscFunctionBegin;
4999   if (scall == MAT_INITIAL_MATRIX){
5000     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5001   }
5002   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5003   PetscFunctionReturn(0);
5004 }
5005 
5006 EXTERN_C_BEGIN
5007 #if defined(PETSC_HAVE_MUMPS)
5008 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*);
5009 #endif
5010 #if defined(PETSC_HAVE_PASTIX)
5011 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5012 #endif
5013 #if defined(PETSC_HAVE_SUPERLU_DIST)
5014 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5015 #endif
5016 #if defined(PETSC_HAVE_SPOOLES)
5017 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5018 #endif
5019 EXTERN_C_END
5020 
5021 /*MC
5022    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5023 
5024    Options Database Keys:
5025 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5026 
5027   Level: beginner
5028 
5029 .seealso: MatCreateMPIAIJ()
5030 M*/
5031 
5032 EXTERN_C_BEGIN
5033 #undef __FUNCT__
5034 #define __FUNCT__ "MatCreate_MPIAIJ"
5035 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5036 {
5037   Mat_MPIAIJ     *b;
5038   PetscErrorCode ierr;
5039   PetscMPIInt    size;
5040 
5041   PetscFunctionBegin;
5042   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5043 
5044   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5045   B->data         = (void*)b;
5046   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5047   B->rmap->bs     = 1;
5048   B->assembled    = PETSC_FALSE;
5049   B->mapping      = 0;
5050 
5051   B->insertmode   = NOT_SET_VALUES;
5052   b->size         = size;
5053   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5054 
5055   /* build cache for off array entries formed */
5056   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5057   b->donotstash  = PETSC_FALSE;
5058   b->colmap      = 0;
5059   b->garray      = 0;
5060   b->roworiented = PETSC_TRUE;
5061 
5062   /* stuff used for matrix vector multiply */
5063   b->lvec      = PETSC_NULL;
5064   b->Mvctx     = PETSC_NULL;
5065 
5066   /* stuff for MatGetRow() */
5067   b->rowindices   = 0;
5068   b->rowvalues    = 0;
5069   b->getrowactive = PETSC_FALSE;
5070 
5071 #if defined(PETSC_HAVE_SPOOLES)
5072   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5073                                      "MatGetFactor_mpiaij_spooles",
5074                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5075 #endif
5076 #if defined(PETSC_HAVE_MUMPS)
5077   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5078                                      "MatGetFactor_mpiaij_mumps",
5079                                      MatGetFactor_mpiaij_mumps);CHKERRQ(ierr);
5080 #endif
5081 #if defined(PETSC_HAVE_PASTIX)
5082   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5083 					   "MatGetFactor_mpiaij_pastix",
5084 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5085 #endif
5086 #if defined(PETSC_HAVE_SUPERLU_DIST)
5087   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5088                                      "MatGetFactor_mpiaij_superlu_dist",
5089                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5090 #endif
5091   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5092                                      "MatStoreValues_MPIAIJ",
5093                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5094   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5095                                      "MatRetrieveValues_MPIAIJ",
5096                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5097   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5098 				     "MatGetDiagonalBlock_MPIAIJ",
5099                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5100   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5101 				     "MatIsTranspose_MPIAIJ",
5102 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5103   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5104 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5105 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5106   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5107 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5108 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5109   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5110 				     "MatDiagonalScaleLocal_MPIAIJ",
5111 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5112   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
5113                                      "MatConvert_MPIAIJ_MPICSRPERM",
5114                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5115   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5116                                      "MatConvert_MPIAIJ_MPICRL",
5117                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5118   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5119                                      "MatConvert_MPIAIJ_MPISBAIJ",
5120                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5121   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5122                                      "MatMatMult_MPIDense_MPIAIJ",
5123                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5124   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5125                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5126                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5127   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5128                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5129                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5130   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5131   PetscFunctionReturn(0);
5132 }
5133 EXTERN_C_END
5134 
5135 #undef __FUNCT__
5136 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5137 /*@
5138      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5139          and "off-diagonal" part of the matrix in CSR format.
5140 
5141    Collective on MPI_Comm
5142 
5143    Input Parameters:
5144 +  comm - MPI communicator
5145 .  m - number of local rows (Cannot be PETSC_DECIDE)
5146 .  n - This value should be the same as the local size used in creating the
5147        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5148        calculated if N is given) For square matrices n is almost always m.
5149 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5150 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5151 .   i - row indices for "diagonal" portion of matrix
5152 .   j - column indices
5153 .   a - matrix values
5154 .   oi - row indices for "off-diagonal" portion of matrix
5155 .   oj - column indices
5156 -   oa - matrix values
5157 
5158    Output Parameter:
5159 .   mat - the matrix
5160 
5161    Level: advanced
5162 
5163    Notes:
5164        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5165 
5166        The i and j indices are 0 based
5167 
5168        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5169 
5170        This sets local rows and cannot be used to set off-processor values.
5171 
5172        You cannot later use MatSetValues() to change values in this matrix.
5173 
5174 .keywords: matrix, aij, compressed row, sparse, parallel
5175 
5176 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5177           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5178 @*/
5179 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5180 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5181 {
5182   PetscErrorCode ierr;
5183   Mat_MPIAIJ     *maij;
5184 
5185  PetscFunctionBegin;
5186   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5187   if (i[0]) {
5188     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5189   }
5190   if (oi[0]) {
5191     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5192   }
5193   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5194   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5195   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5196   maij = (Mat_MPIAIJ*) (*mat)->data;
5197   maij->donotstash     = PETSC_TRUE;
5198   (*mat)->preallocated = PETSC_TRUE;
5199 
5200   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5201   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5202   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5203   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5204 
5205   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5206   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5207 
5208   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5209   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5210   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5211   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5212 
5213   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5214   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5215   PetscFunctionReturn(0);
5216 }
5217 
5218 /*
5219     Special version for direct calls from Fortran
5220 */
5221 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5222 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5223 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5224 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5225 #endif
5226 
5227 /* Change these macros so can be used in void function */
5228 #undef CHKERRQ
5229 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5230 #undef SETERRQ2
5231 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5232 #undef SETERRQ
5233 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5234 
5235 EXTERN_C_BEGIN
5236 #undef __FUNCT__
5237 #define __FUNCT__ "matsetvaluesmpiaij_"
5238 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5239 {
5240   Mat             mat = *mmat;
5241   PetscInt        m = *mm, n = *mn;
5242   InsertMode      addv = *maddv;
5243   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5244   PetscScalar     value;
5245   PetscErrorCode  ierr;
5246 
5247   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5248   if (mat->insertmode == NOT_SET_VALUES) {
5249     mat->insertmode = addv;
5250   }
5251 #if defined(PETSC_USE_DEBUG)
5252   else if (mat->insertmode != addv) {
5253     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5254   }
5255 #endif
5256   {
5257   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5258   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5259   PetscTruth      roworiented = aij->roworiented;
5260 
5261   /* Some Variables required in the macro */
5262   Mat             A = aij->A;
5263   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5264   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5265   MatScalar       *aa = a->a;
5266   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5267   Mat             B = aij->B;
5268   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5269   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5270   MatScalar       *ba = b->a;
5271 
5272   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5273   PetscInt        nonew = a->nonew;
5274   MatScalar       *ap1,*ap2;
5275 
5276   PetscFunctionBegin;
5277   for (i=0; i<m; i++) {
5278     if (im[i] < 0) continue;
5279 #if defined(PETSC_USE_DEBUG)
5280     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5281 #endif
5282     if (im[i] >= rstart && im[i] < rend) {
5283       row      = im[i] - rstart;
5284       lastcol1 = -1;
5285       rp1      = aj + ai[row];
5286       ap1      = aa + ai[row];
5287       rmax1    = aimax[row];
5288       nrow1    = ailen[row];
5289       low1     = 0;
5290       high1    = nrow1;
5291       lastcol2 = -1;
5292       rp2      = bj + bi[row];
5293       ap2      = ba + bi[row];
5294       rmax2    = bimax[row];
5295       nrow2    = bilen[row];
5296       low2     = 0;
5297       high2    = nrow2;
5298 
5299       for (j=0; j<n; j++) {
5300         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5301         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5302         if (in[j] >= cstart && in[j] < cend){
5303           col = in[j] - cstart;
5304           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5305         } else if (in[j] < 0) continue;
5306 #if defined(PETSC_USE_DEBUG)
5307         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5308 #endif
5309         else {
5310           if (mat->was_assembled) {
5311             if (!aij->colmap) {
5312               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5313             }
5314 #if defined (PETSC_USE_CTABLE)
5315             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5316 	    col--;
5317 #else
5318             col = aij->colmap[in[j]] - 1;
5319 #endif
5320             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5321               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5322               col =  in[j];
5323               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5324               B = aij->B;
5325               b = (Mat_SeqAIJ*)B->data;
5326               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5327               rp2      = bj + bi[row];
5328               ap2      = ba + bi[row];
5329               rmax2    = bimax[row];
5330               nrow2    = bilen[row];
5331               low2     = 0;
5332               high2    = nrow2;
5333               bm       = aij->B->rmap->n;
5334               ba = b->a;
5335             }
5336           } else col = in[j];
5337           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5338         }
5339       }
5340     } else {
5341       if (!aij->donotstash) {
5342         if (roworiented) {
5343           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5344         } else {
5345           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5346         }
5347       }
5348     }
5349   }}
5350   PetscFunctionReturnVoid();
5351 }
5352 EXTERN_C_END
5353 
5354