xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 5c99c7da0196495a68e78ce0e9a4c534dbd4a2a0)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "MatDistribute_MPIAIJ"
7 /*
8     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
9     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
10 
11     Only for square matrices
12 */
13 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
14 {
15   PetscMPIInt    rank,size;
16   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
17   PetscErrorCode ierr;
18   Mat            mat;
19   Mat_SeqAIJ     *gmata;
20   PetscMPIInt    tag;
21   MPI_Status     status;
22   PetscTruth     aij;
23   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
24 
25   PetscFunctionBegin;
26   CHKMEMQ;
27   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
28   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
29   if (!rank) {
30     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
31     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
32   }
33   if (reuse == MAT_INITIAL_MATRIX) {
34     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
35     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
36     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
37     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
38     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
39     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
40     rowners[0] = 0;
41     for (i=2; i<=size; i++) {
42       rowners[i] += rowners[i-1];
43     }
44     rstart = rowners[rank];
45     rend   = rowners[rank+1];
46     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
47     if (!rank) {
48       gmata = (Mat_SeqAIJ*) gmat->data;
49       /* send row lengths to all processors */
50       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
51       for (i=1; i<size; i++) {
52 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
53       }
54       /* determine number diagonal and off-diagonal counts */
55       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
56       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
57       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
58       jj = 0;
59       for (i=0; i<m; i++) {
60 	for (j=0; j<dlens[i]; j++) {
61           if (gmata->j[jj] < rstart) ld[i]++;
62 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
63 	  jj++;
64 	}
65       }
66       /* send column indices to other processes */
67       for (i=1; i<size; i++) {
68 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
69 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
70 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71       }
72 
73       /* send numerical values to other processes */
74       for (i=1; i<size; i++) {
75         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
76         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
77       }
78       gmataa = gmata->a;
79       gmataj = gmata->j;
80 
81     } else {
82       /* receive row lengths */
83       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
84       /* receive column indices */
85       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
86       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
87       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
88       /* determine number diagonal and off-diagonal counts */
89       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
90       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
91       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
92       jj = 0;
93       for (i=0; i<m; i++) {
94 	for (j=0; j<dlens[i]; j++) {
95           if (gmataj[jj] < rstart) ld[i]++;
96 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
97 	  jj++;
98 	}
99       }
100       /* receive numerical values */
101       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
102       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
103     }
104     /* set preallocation */
105     for (i=0; i<m; i++) {
106       dlens[i] -= olens[i];
107     }
108     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
109     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
110 
111     for (i=0; i<m; i++) {
112       dlens[i] += olens[i];
113     }
114     cnt  = 0;
115     for (i=0; i<m; i++) {
116       row  = rstart + i;
117       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
118       cnt += dlens[i];
119     }
120     if (rank) {
121       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
122     }
123     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
124     ierr = PetscFree(rowners);CHKERRQ(ierr);
125     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
126     *inmat = mat;
127   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
128     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
129     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
130     mat   = *inmat;
131     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
132     if (!rank) {
133       /* send numerical values to other processes */
134       gmata = (Mat_SeqAIJ*) gmat->data;
135       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
136       gmataa = gmata->a;
137       for (i=1; i<size; i++) {
138         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
139         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
140       }
141       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
142     } else {
143       /* receive numerical values from process 0*/
144       nz   = Ad->nz + Ao->nz;
145       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
146       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
147     }
148     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
149     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
150     ad = Ad->a;
151     ao = Ao->a;
152     if (mat->rmap->n) {
153       i  = 0;
154       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
155       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
156     }
157     for (i=1; i<mat->rmap->n; i++) {
158       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
159       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
160     }
161     i--;
162     if (mat->rmap->n) {
163       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
164     }
165     if (rank) {
166       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
167     }
168   }
169   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
170   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   CHKMEMQ;
172   PetscFunctionReturn(0);
173 }
174 
175 /*
176   Local utility routine that creates a mapping from the global column
177 number to the local number in the off-diagonal part of the local
178 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
179 a slightly higher hash table cost; without it it is not scalable (each processor
180 has an order N integer array but is fast to acess.
181 */
182 #undef __FUNCT__
183 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
184 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
185 {
186   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
187   PetscErrorCode ierr;
188   PetscInt       n = aij->B->cmap->n,i;
189 
190   PetscFunctionBegin;
191 #if defined (PETSC_USE_CTABLE)
192   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
193   for (i=0; i<n; i++){
194     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
195   }
196 #else
197   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
198   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
199   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
201 #endif
202   PetscFunctionReturn(0);
203 }
204 
205 
206 #define CHUNKSIZE   15
207 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
208 { \
209     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
210     lastcol1 = col;\
211     while (high1-low1 > 5) { \
212       t = (low1+high1)/2; \
213       if (rp1[t] > col) high1 = t; \
214       else             low1  = t; \
215     } \
216       for (_i=low1; _i<high1; _i++) { \
217         if (rp1[_i] > col) break; \
218         if (rp1[_i] == col) { \
219           if (addv == ADD_VALUES) ap1[_i] += value;   \
220           else                    ap1[_i] = value; \
221           goto a_noinsert; \
222         } \
223       }  \
224       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
225       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
226       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
227       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
228       N = nrow1++ - 1; a->nz++; high1++; \
229       /* shift up all the later entries in this row */ \
230       for (ii=N; ii>=_i; ii--) { \
231         rp1[ii+1] = rp1[ii]; \
232         ap1[ii+1] = ap1[ii]; \
233       } \
234       rp1[_i] = col;  \
235       ap1[_i] = value;  \
236       a_noinsert: ; \
237       ailen[row] = nrow1; \
238 }
239 
240 
241 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
242 { \
243     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
244     lastcol2 = col;\
245     while (high2-low2 > 5) { \
246       t = (low2+high2)/2; \
247       if (rp2[t] > col) high2 = t; \
248       else             low2  = t; \
249     } \
250     for (_i=low2; _i<high2; _i++) {		\
251       if (rp2[_i] > col) break;			\
252       if (rp2[_i] == col) {			      \
253 	if (addv == ADD_VALUES) ap2[_i] += value;     \
254 	else                    ap2[_i] = value;      \
255 	goto b_noinsert;			      \
256       }						      \
257     }							      \
258     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
259     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
260     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
261     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
262     N = nrow2++ - 1; b->nz++; high2++;					\
263     /* shift up all the later entries in this row */			\
264     for (ii=N; ii>=_i; ii--) {						\
265       rp2[ii+1] = rp2[ii];						\
266       ap2[ii+1] = ap2[ii];						\
267     }									\
268     rp2[_i] = col;							\
269     ap2[_i] = value;							\
270     b_noinsert: ;								\
271     bilen[row] = nrow2;							\
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
276 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
277 {
278   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
279   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
280   PetscErrorCode ierr;
281   PetscInt       l,*garray = mat->garray,diag;
282 
283   PetscFunctionBegin;
284   /* code only works for square matrices A */
285 
286   /* find size of row to the left of the diagonal part */
287   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
288   row  = row - diag;
289   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
290     if (garray[b->j[b->i[row]+l]] > diag) break;
291   }
292   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
293 
294   /* diagonal part */
295   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
296 
297   /* right of diagonal part */
298   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
299   PetscFunctionReturn(0);
300 }
301 
302 #undef __FUNCT__
303 #define __FUNCT__ "MatSetValues_MPIAIJ"
304 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
305 {
306   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
307   PetscScalar    value;
308   PetscErrorCode ierr;
309   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
310   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
311   PetscTruth     roworiented = aij->roworiented;
312 
313   /* Some Variables required in the macro */
314   Mat            A = aij->A;
315   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
316   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
317   MatScalar      *aa = a->a;
318   PetscTruth     ignorezeroentries = a->ignorezeroentries;
319   Mat            B = aij->B;
320   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
321   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
322   MatScalar      *ba = b->a;
323 
324   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
325   PetscInt       nonew = a->nonew;
326   MatScalar      *ap1,*ap2;
327 
328   PetscFunctionBegin;
329   for (i=0; i<m; i++) {
330     if (im[i] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
333 #endif
334     if (im[i] >= rstart && im[i] < rend) {
335       row      = im[i] - rstart;
336       lastcol1 = -1;
337       rp1      = aj + ai[row];
338       ap1      = aa + ai[row];
339       rmax1    = aimax[row];
340       nrow1    = ailen[row];
341       low1     = 0;
342       high1    = nrow1;
343       lastcol2 = -1;
344       rp2      = bj + bi[row];
345       ap2      = ba + bi[row];
346       rmax2    = bimax[row];
347       nrow2    = bilen[row];
348       low2     = 0;
349       high2    = nrow2;
350 
351       for (j=0; j<n; j++) {
352         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
353         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
354         if (in[j] >= cstart && in[j] < cend){
355           col = in[j] - cstart;
356           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
357         } else if (in[j] < 0) continue;
358 #if defined(PETSC_USE_DEBUG)
359         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
360 #endif
361         else {
362           if (mat->was_assembled) {
363             if (!aij->colmap) {
364               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
365             }
366 #if defined (PETSC_USE_CTABLE)
367             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
368 	    col--;
369 #else
370             col = aij->colmap[in[j]] - 1;
371 #endif
372             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
373               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
374               col =  in[j];
375               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
376               B = aij->B;
377               b = (Mat_SeqAIJ*)B->data;
378               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
379               rp2      = bj + bi[row];
380               ap2      = ba + bi[row];
381               rmax2    = bimax[row];
382               nrow2    = bilen[row];
383               low2     = 0;
384               high2    = nrow2;
385               bm       = aij->B->rmap->n;
386               ba = b->a;
387             }
388           } else col = in[j];
389           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
390         }
391       }
392     } else {
393       if (!aij->donotstash) {
394         if (roworiented) {
395           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
396         } else {
397           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
398         }
399       }
400     }
401   }
402   PetscFunctionReturn(0);
403 }
404 
405 #undef __FUNCT__
406 #define __FUNCT__ "MatGetValues_MPIAIJ"
407 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
408 {
409   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
410   PetscErrorCode ierr;
411   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
412   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
413 
414   PetscFunctionBegin;
415   for (i=0; i<m; i++) {
416     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
417     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
418     if (idxm[i] >= rstart && idxm[i] < rend) {
419       row = idxm[i] - rstart;
420       for (j=0; j<n; j++) {
421         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
422         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
423         if (idxn[j] >= cstart && idxn[j] < cend){
424           col = idxn[j] - cstart;
425           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
426         } else {
427           if (!aij->colmap) {
428             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
429           }
430 #if defined (PETSC_USE_CTABLE)
431           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
432           col --;
433 #else
434           col = aij->colmap[idxn[j]] - 1;
435 #endif
436           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
437           else {
438             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
439           }
440         }
441       }
442     } else {
443       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
444     }
445   }
446   PetscFunctionReturn(0);
447 }
448 
449 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
453 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
454 {
455   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
456   PetscErrorCode ierr;
457   PetscInt       nstash,reallocs;
458   InsertMode     addv;
459 
460   PetscFunctionBegin;
461   if (aij->donotstash) {
462     PetscFunctionReturn(0);
463   }
464 
465   /* make sure all processors are either in INSERTMODE or ADDMODE */
466   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
467   if (addv == (ADD_VALUES|INSERT_VALUES)) {
468     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
469   }
470   mat->insertmode = addv; /* in case this processor had no cache */
471 
472   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
473   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
474   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
475   PetscFunctionReturn(0);
476 }
477 
478 #undef __FUNCT__
479 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
480 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
481 {
482   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
483   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
484   PetscErrorCode ierr;
485   PetscMPIInt    n;
486   PetscInt       i,j,rstart,ncols,flg;
487   PetscInt       *row,*col;
488   PetscTruth     other_disassembled;
489   PetscScalar    *val;
490   InsertMode     addv = mat->insertmode;
491 
492   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
493   PetscFunctionBegin;
494   if (!aij->donotstash) {
495     while (1) {
496       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
497       if (!flg) break;
498 
499       for (i=0; i<n;) {
500         /* Now identify the consecutive vals belonging to the same row */
501         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
502         if (j < n) ncols = j-i;
503         else       ncols = n-i;
504         /* Now assemble all these values with a single function call */
505         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
506         i = j;
507       }
508     }
509     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
510   }
511   a->compressedrow.use     = PETSC_FALSE;
512   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
513   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
514 
515   /* determine if any processor has disassembled, if so we must
516      also disassemble ourselfs, in order that we may reassemble. */
517   /*
518      if nonzero structure of submatrix B cannot change then we know that
519      no processor disassembled thus we can skip this stuff
520   */
521   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
522     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
523     if (mat->was_assembled && !other_disassembled) {
524       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
525     }
526   }
527   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
528     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
529   }
530   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
531   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
532   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
533   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
534 
535   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
536   aij->rowvalues = 0;
537 
538   /* used by MatAXPY() */
539   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
540   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
541 
542   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
543   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
544   PetscFunctionReturn(0);
545 }
546 
547 #undef __FUNCT__
548 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
549 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
550 {
551   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
552   PetscErrorCode ierr;
553 
554   PetscFunctionBegin;
555   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
556   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatZeroRows_MPIAIJ"
562 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
563 {
564   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
565   PetscErrorCode ierr;
566   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
567   PetscInt       i,*owners = A->rmap->range;
568   PetscInt       *nprocs,j,idx,nsends,row;
569   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
570   PetscInt       *rvalues,count,base,slen,*source;
571   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
572   MPI_Comm       comm = ((PetscObject)A)->comm;
573   MPI_Request    *send_waits,*recv_waits;
574   MPI_Status     recv_status,*send_status;
575 #if defined(PETSC_DEBUG)
576   PetscTruth     found = PETSC_FALSE;
577 #endif
578 
579   PetscFunctionBegin;
580   /*  first count number of contributors to each processor */
581   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
582   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
583   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
584   j = 0;
585   for (i=0; i<N; i++) {
586     if (lastidx > (idx = rows[i])) j = 0;
587     lastidx = idx;
588     for (; j<size; j++) {
589       if (idx >= owners[j] && idx < owners[j+1]) {
590         nprocs[2*j]++;
591         nprocs[2*j+1] = 1;
592         owner[i] = j;
593 #if defined(PETSC_DEBUG)
594         found = PETSC_TRUE;
595 #endif
596         break;
597       }
598     }
599 #if defined(PETSC_DEBUG)
600     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
601     found = PETSC_FALSE;
602 #endif
603   }
604   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
605 
606   /* inform other processors of number of messages and max length*/
607   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
608 
609   /* post receives:   */
610   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
611   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
612   for (i=0; i<nrecvs; i++) {
613     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
614   }
615 
616   /* do sends:
617       1) starts[i] gives the starting index in svalues for stuff going to
618          the ith processor
619   */
620   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
621   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
622   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
623   starts[0] = 0;
624   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
625   for (i=0; i<N; i++) {
626     svalues[starts[owner[i]]++] = rows[i];
627   }
628 
629   starts[0] = 0;
630   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
631   count = 0;
632   for (i=0; i<size; i++) {
633     if (nprocs[2*i+1]) {
634       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
635     }
636   }
637   ierr = PetscFree(starts);CHKERRQ(ierr);
638 
639   base = owners[rank];
640 
641   /*  wait on receives */
642   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
643   source = lens + nrecvs;
644   count  = nrecvs; slen = 0;
645   while (count) {
646     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
647     /* unpack receives into our local space */
648     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
649     source[imdex]  = recv_status.MPI_SOURCE;
650     lens[imdex]    = n;
651     slen          += n;
652     count--;
653   }
654   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
655 
656   /* move the data into the send scatter */
657   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
658   count = 0;
659   for (i=0; i<nrecvs; i++) {
660     values = rvalues + i*nmax;
661     for (j=0; j<lens[i]; j++) {
662       lrows[count++] = values[j] - base;
663     }
664   }
665   ierr = PetscFree(rvalues);CHKERRQ(ierr);
666   ierr = PetscFree(lens);CHKERRQ(ierr);
667   ierr = PetscFree(owner);CHKERRQ(ierr);
668   ierr = PetscFree(nprocs);CHKERRQ(ierr);
669 
670   /* actually zap the local rows */
671   /*
672         Zero the required rows. If the "diagonal block" of the matrix
673      is square and the user wishes to set the diagonal we use separate
674      code so that MatSetValues() is not called for each diagonal allocating
675      new memory, thus calling lots of mallocs and slowing things down.
676 
677   */
678   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
679   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
680   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
681     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
682   } else if (diag != 0.0) {
683     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
684     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
685       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
687     }
688     for (i = 0; i < slen; i++) {
689       row  = lrows[i] + rstart;
690       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
691     }
692     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
693     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
694   } else {
695     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
696   }
697   ierr = PetscFree(lrows);CHKERRQ(ierr);
698 
699   /* wait on sends */
700   if (nsends) {
701     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
702     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
703     ierr = PetscFree(send_status);CHKERRQ(ierr);
704   }
705   ierr = PetscFree(send_waits);CHKERRQ(ierr);
706   ierr = PetscFree(svalues);CHKERRQ(ierr);
707 
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatMult_MPIAIJ"
713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
714 {
715   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
716   PetscErrorCode ierr;
717   PetscInt       nt;
718 
719   PetscFunctionBegin;
720   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
721   if (nt != A->cmap->n) {
722     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
723   }
724   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
725   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
726   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
727   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNCT__
732 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
733 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
734 {
735   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
736   PetscErrorCode ierr;
737 
738   PetscFunctionBegin;
739   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatMultAdd_MPIAIJ"
745 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
746 {
747   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
748   PetscErrorCode ierr;
749 
750   PetscFunctionBegin;
751   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
752   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
753   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
754   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
755   PetscFunctionReturn(0);
756 }
757 
758 #undef __FUNCT__
759 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
760 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
761 {
762   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
763   PetscErrorCode ierr;
764   PetscTruth     merged;
765 
766   PetscFunctionBegin;
767   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
768   /* do nondiagonal part */
769   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
770   if (!merged) {
771     /* send it on its way */
772     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
773     /* do local part */
774     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
775     /* receive remote parts: note this assumes the values are not actually */
776     /* added in yy until the next line, */
777     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
778   } else {
779     /* do local part */
780     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
781     /* send it on its way */
782     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
783     /* values actually were received in the Begin() but we need to call this nop */
784     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
785   }
786   PetscFunctionReturn(0);
787 }
788 
789 EXTERN_C_BEGIN
790 #undef __FUNCT__
791 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
792 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
793 {
794   MPI_Comm       comm;
795   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
796   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
797   IS             Me,Notme;
798   PetscErrorCode ierr;
799   PetscInt       M,N,first,last,*notme,i;
800   PetscMPIInt    size;
801 
802   PetscFunctionBegin;
803 
804   /* Easy test: symmetric diagonal block */
805   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
806   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
807   if (!*f) PetscFunctionReturn(0);
808   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
809   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
810   if (size == 1) PetscFunctionReturn(0);
811 
812   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
813   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
814   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
815   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
816   for (i=0; i<first; i++) notme[i] = i;
817   for (i=last; i<M; i++) notme[i-last+first] = i;
818   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
819   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
820   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
821   Aoff = Aoffs[0];
822   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
823   Boff = Boffs[0];
824   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
825   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
826   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
827   ierr = ISDestroy(Me);CHKERRQ(ierr);
828   ierr = ISDestroy(Notme);CHKERRQ(ierr);
829 
830   PetscFunctionReturn(0);
831 }
832 EXTERN_C_END
833 
834 #undef __FUNCT__
835 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
836 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
837 {
838   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
839   PetscErrorCode ierr;
840 
841   PetscFunctionBegin;
842   /* do nondiagonal part */
843   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
844   /* send it on its way */
845   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
846   /* do local part */
847   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
848   /* receive remote parts */
849   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
850   PetscFunctionReturn(0);
851 }
852 
853 /*
854   This only works correctly for square matrices where the subblock A->A is the
855    diagonal block
856 */
857 #undef __FUNCT__
858 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
859 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
860 {
861   PetscErrorCode ierr;
862   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
863 
864   PetscFunctionBegin;
865   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
866   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) {
867     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
868   }
869   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatScale_MPIAIJ"
875 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
876 {
877   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
878   PetscErrorCode ierr;
879 
880   PetscFunctionBegin;
881   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
882   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
883   PetscFunctionReturn(0);
884 }
885 
886 #undef __FUNCT__
887 #define __FUNCT__ "MatDestroy_MPIAIJ"
888 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
889 {
890   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
891   PetscErrorCode ierr;
892 
893   PetscFunctionBegin;
894 #if defined(PETSC_USE_LOG)
895   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
896 #endif
897   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
898   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
899   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
900   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
901 #if defined (PETSC_USE_CTABLE)
902   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
903 #else
904   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
905 #endif
906   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
907   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
908   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
909   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
910   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
911   ierr = PetscFree(aij);CHKERRQ(ierr);
912 
913   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
915   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
916   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
917   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
918   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
919   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
920   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
921   PetscFunctionReturn(0);
922 }
923 
924 #undef __FUNCT__
925 #define __FUNCT__ "MatView_MPIAIJ_Binary"
926 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
927 {
928   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
929   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
930   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
931   PetscErrorCode    ierr;
932   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
933   int               fd;
934   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
935   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
936   PetscScalar       *column_values;
937 
938   PetscFunctionBegin;
939   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
940   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
941   nz   = A->nz + B->nz;
942   if (!rank) {
943     header[0] = MAT_FILE_COOKIE;
944     header[1] = mat->rmap->N;
945     header[2] = mat->cmap->N;
946     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
947     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
948     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
949     /* get largest number of rows any processor has */
950     rlen = mat->rmap->n;
951     range = mat->rmap->range;
952     for (i=1; i<size; i++) {
953       rlen = PetscMax(rlen,range[i+1] - range[i]);
954     }
955   } else {
956     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
957     rlen = mat->rmap->n;
958   }
959 
960   /* load up the local row counts */
961   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
962   for (i=0; i<mat->rmap->n; i++) {
963     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
964   }
965 
966   /* store the row lengths to the file */
967   if (!rank) {
968     MPI_Status status;
969     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
970     for (i=1; i<size; i++) {
971       rlen = range[i+1] - range[i];
972       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
973       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
974     }
975   } else {
976     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
977   }
978   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
979 
980   /* load up the local column indices */
981   nzmax = nz; /* )th processor needs space a largest processor needs */
982   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
983   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
984   cnt  = 0;
985   for (i=0; i<mat->rmap->n; i++) {
986     for (j=B->i[i]; j<B->i[i+1]; j++) {
987       if ( (col = garray[B->j[j]]) > cstart) break;
988       column_indices[cnt++] = col;
989     }
990     for (k=A->i[i]; k<A->i[i+1]; k++) {
991       column_indices[cnt++] = A->j[k] + cstart;
992     }
993     for (; j<B->i[i+1]; j++) {
994       column_indices[cnt++] = garray[B->j[j]];
995     }
996   }
997   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
998 
999   /* store the column indices to the file */
1000   if (!rank) {
1001     MPI_Status status;
1002     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1003     for (i=1; i<size; i++) {
1004       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1005       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1006       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1007       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1008     }
1009   } else {
1010     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1011     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1012   }
1013   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1014 
1015   /* load up the local column values */
1016   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1017   cnt  = 0;
1018   for (i=0; i<mat->rmap->n; i++) {
1019     for (j=B->i[i]; j<B->i[i+1]; j++) {
1020       if ( garray[B->j[j]] > cstart) break;
1021       column_values[cnt++] = B->a[j];
1022     }
1023     for (k=A->i[i]; k<A->i[i+1]; k++) {
1024       column_values[cnt++] = A->a[k];
1025     }
1026     for (; j<B->i[i+1]; j++) {
1027       column_values[cnt++] = B->a[j];
1028     }
1029   }
1030   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1031 
1032   /* store the column values to the file */
1033   if (!rank) {
1034     MPI_Status status;
1035     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1036     for (i=1; i<size; i++) {
1037       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1038       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1039       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1040       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1041     }
1042   } else {
1043     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1044     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1045   }
1046   ierr = PetscFree(column_values);CHKERRQ(ierr);
1047   PetscFunctionReturn(0);
1048 }
1049 
1050 #undef __FUNCT__
1051 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1052 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1053 {
1054   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1055   PetscErrorCode    ierr;
1056   PetscMPIInt       rank = aij->rank,size = aij->size;
1057   PetscTruth        isdraw,iascii,isbinary;
1058   PetscViewer       sviewer;
1059   PetscViewerFormat format;
1060 
1061   PetscFunctionBegin;
1062   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1063   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1064   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1065   if (iascii) {
1066     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1067     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1068       MatInfo    info;
1069       PetscTruth inodes;
1070 
1071       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1072       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1073       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1074       if (!inodes) {
1075         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1076 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1077       } else {
1078         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1079 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1080       }
1081       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1082       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1083       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1084       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1085       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1086       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1087       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1088       PetscFunctionReturn(0);
1089     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1090       PetscInt   inodecount,inodelimit,*inodes;
1091       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1092       if (inodes) {
1093         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1094       } else {
1095         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1096       }
1097       PetscFunctionReturn(0);
1098     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1099       PetscFunctionReturn(0);
1100     }
1101   } else if (isbinary) {
1102     if (size == 1) {
1103       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1104       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1105     } else {
1106       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1107     }
1108     PetscFunctionReturn(0);
1109   } else if (isdraw) {
1110     PetscDraw  draw;
1111     PetscTruth isnull;
1112     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1113     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1114   }
1115 
1116   if (size == 1) {
1117     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1118     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1119   } else {
1120     /* assemble the entire matrix onto first processor. */
1121     Mat         A;
1122     Mat_SeqAIJ  *Aloc;
1123     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1124     MatScalar   *a;
1125 
1126     if (mat->rmap->N > 1024) {
1127       PetscTruth flg = PETSC_FALSE;
1128 
1129       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1130       if (!flg) {
1131         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1132       }
1133     }
1134 
1135     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1136     if (!rank) {
1137       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1138     } else {
1139       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1140     }
1141     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1142     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1143     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1144     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1145 
1146     /* copy over the A part */
1147     Aloc = (Mat_SeqAIJ*)aij->A->data;
1148     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1149     row = mat->rmap->rstart;
1150     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1151     for (i=0; i<m; i++) {
1152       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1153       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1154     }
1155     aj = Aloc->j;
1156     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1157 
1158     /* copy over the B part */
1159     Aloc = (Mat_SeqAIJ*)aij->B->data;
1160     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1161     row  = mat->rmap->rstart;
1162     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1163     ct   = cols;
1164     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1165     for (i=0; i<m; i++) {
1166       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1167       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1168     }
1169     ierr = PetscFree(ct);CHKERRQ(ierr);
1170     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1171     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1172     /*
1173        Everyone has to call to draw the matrix since the graphics waits are
1174        synchronized across all processors that share the PetscDraw object
1175     */
1176     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1177     if (!rank) {
1178       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1179       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1180     }
1181     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1182     ierr = MatDestroy(A);CHKERRQ(ierr);
1183   }
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 #undef __FUNCT__
1188 #define __FUNCT__ "MatView_MPIAIJ"
1189 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1190 {
1191   PetscErrorCode ierr;
1192   PetscTruth     iascii,isdraw,issocket,isbinary;
1193 
1194   PetscFunctionBegin;
1195   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1196   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1197   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1198   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1199   if (iascii || isdraw || isbinary || issocket) {
1200     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1201   } else {
1202     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1203   }
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 #undef __FUNCT__
1208 #define __FUNCT__ "MatRelax_MPIAIJ"
1209 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1210 {
1211   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1212   PetscErrorCode ierr;
1213   Vec            bb1 = 0;
1214   PetscTruth     hasop;
1215 
1216   PetscFunctionBegin;
1217   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1218     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1219   }
1220 
1221   if (flag == SOR_APPLY_UPPER) {
1222     ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1223     PetscFunctionReturn(0);
1224   }
1225 
1226   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1227     if (flag & SOR_ZERO_INITIAL_GUESS) {
1228       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1229       its--;
1230     }
1231 
1232     while (its--) {
1233       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1234       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1235 
1236       /* update rhs: bb1 = bb - B*x */
1237       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1238       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1239 
1240       /* local sweep */
1241       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1242     }
1243   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1244     if (flag & SOR_ZERO_INITIAL_GUESS) {
1245       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1246       its--;
1247     }
1248     while (its--) {
1249       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1250       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1251 
1252       /* update rhs: bb1 = bb - B*x */
1253       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1254       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1255 
1256       /* local sweep */
1257       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1258     }
1259   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1260     if (flag & SOR_ZERO_INITIAL_GUESS) {
1261       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1262       its--;
1263     }
1264     while (its--) {
1265       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1266       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1267 
1268       /* update rhs: bb1 = bb - B*x */
1269       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1270       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1271 
1272       /* local sweep */
1273       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1274     }
1275   }  else if (flag & SOR_EISENSTAT) {
1276     Vec         xx1;
1277 
1278     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1279     ierr = (*mat->A->ops->relax)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1280 
1281     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1282     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1283     if (!mat->diag) {
1284       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1285       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1286     }
1287     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1288     if (hasop) {
1289       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1290     } else {
1291       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1292     }
1293     ierr = VecAYPX(bb1,-1.0,bb);CHKERRQ(ierr);
1294     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1295 
1296     /* local sweep */
1297     ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1298     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1299     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1300   } else {
1301     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1302   }
1303 
1304   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 #undef __FUNCT__
1309 #define __FUNCT__ "MatPermute_MPIAIJ"
1310 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1311 {
1312   MPI_Comm       comm,pcomm;
1313   PetscInt       first,local_size,nrows;
1314   const PetscInt *rows;
1315   PetscMPIInt    size;
1316   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1317   PetscErrorCode ierr;
1318 
1319   PetscFunctionBegin;
1320   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1321   /* make a collective version of 'rowp' */
1322   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1323   if (pcomm==comm) {
1324     crowp = rowp;
1325   } else {
1326     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1327     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1328     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1329     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1330   }
1331   /* collect the global row permutation and invert it */
1332   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1333   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1334   if (pcomm!=comm) {
1335     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1336   }
1337   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1338   /* get the local target indices */
1339   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1340   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1341   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1342   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1343   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1344   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1345   /* the column permutation is so much easier;
1346      make a local version of 'colp' and invert it */
1347   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1348   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1349   if (size==1) {
1350     lcolp = colp;
1351   } else {
1352     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1353     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1354     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1355   }
1356   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1357   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1358   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1359   if (size>1) {
1360     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1361     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1362   }
1363   /* now we just get the submatrix */
1364   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1365   /* clean up */
1366   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1367   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1368   PetscFunctionReturn(0);
1369 }
1370 
1371 #undef __FUNCT__
1372 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1373 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1374 {
1375   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1376   Mat            A = mat->A,B = mat->B;
1377   PetscErrorCode ierr;
1378   PetscReal      isend[5],irecv[5];
1379 
1380   PetscFunctionBegin;
1381   info->block_size     = 1.0;
1382   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1383   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1384   isend[3] = info->memory;  isend[4] = info->mallocs;
1385   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1386   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1387   isend[3] += info->memory;  isend[4] += info->mallocs;
1388   if (flag == MAT_LOCAL) {
1389     info->nz_used      = isend[0];
1390     info->nz_allocated = isend[1];
1391     info->nz_unneeded  = isend[2];
1392     info->memory       = isend[3];
1393     info->mallocs      = isend[4];
1394   } else if (flag == MAT_GLOBAL_MAX) {
1395     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1396     info->nz_used      = irecv[0];
1397     info->nz_allocated = irecv[1];
1398     info->nz_unneeded  = irecv[2];
1399     info->memory       = irecv[3];
1400     info->mallocs      = irecv[4];
1401   } else if (flag == MAT_GLOBAL_SUM) {
1402     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1403     info->nz_used      = irecv[0];
1404     info->nz_allocated = irecv[1];
1405     info->nz_unneeded  = irecv[2];
1406     info->memory       = irecv[3];
1407     info->mallocs      = irecv[4];
1408   }
1409   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1410   info->fill_ratio_needed = 0;
1411   info->factor_mallocs    = 0;
1412 
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 #undef __FUNCT__
1417 #define __FUNCT__ "MatSetOption_MPIAIJ"
1418 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1419 {
1420   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1421   PetscErrorCode ierr;
1422 
1423   PetscFunctionBegin;
1424   switch (op) {
1425   case MAT_NEW_NONZERO_LOCATIONS:
1426   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1427   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1428   case MAT_KEEP_NONZERO_PATTERN:
1429   case MAT_NEW_NONZERO_LOCATION_ERR:
1430   case MAT_USE_INODES:
1431   case MAT_IGNORE_ZERO_ENTRIES:
1432     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1433     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1434     break;
1435   case MAT_ROW_ORIENTED:
1436     a->roworiented = flg;
1437     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1438     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1439     break;
1440   case MAT_NEW_DIAGONALS:
1441     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1442     break;
1443   case MAT_IGNORE_OFF_PROC_ENTRIES:
1444     a->donotstash = PETSC_TRUE;
1445     break;
1446   case MAT_SYMMETRIC:
1447     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1448     break;
1449   case MAT_STRUCTURALLY_SYMMETRIC:
1450   case MAT_HERMITIAN:
1451   case MAT_SYMMETRY_ETERNAL:
1452     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1453     break;
1454   default:
1455     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1456   }
1457   PetscFunctionReturn(0);
1458 }
1459 
1460 #undef __FUNCT__
1461 #define __FUNCT__ "MatGetRow_MPIAIJ"
1462 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1463 {
1464   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1465   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1466   PetscErrorCode ierr;
1467   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1468   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1469   PetscInt       *cmap,*idx_p;
1470 
1471   PetscFunctionBegin;
1472   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1473   mat->getrowactive = PETSC_TRUE;
1474 
1475   if (!mat->rowvalues && (idx || v)) {
1476     /*
1477         allocate enough space to hold information from the longest row.
1478     */
1479     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1480     PetscInt     max = 1,tmp;
1481     for (i=0; i<matin->rmap->n; i++) {
1482       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1483       if (max < tmp) { max = tmp; }
1484     }
1485     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1486     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1487   }
1488 
1489   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1490   lrow = row - rstart;
1491 
1492   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1493   if (!v)   {pvA = 0; pvB = 0;}
1494   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1495   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1496   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1497   nztot = nzA + nzB;
1498 
1499   cmap  = mat->garray;
1500   if (v  || idx) {
1501     if (nztot) {
1502       /* Sort by increasing column numbers, assuming A and B already sorted */
1503       PetscInt imark = -1;
1504       if (v) {
1505         *v = v_p = mat->rowvalues;
1506         for (i=0; i<nzB; i++) {
1507           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1508           else break;
1509         }
1510         imark = i;
1511         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1512         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1513       }
1514       if (idx) {
1515         *idx = idx_p = mat->rowindices;
1516         if (imark > -1) {
1517           for (i=0; i<imark; i++) {
1518             idx_p[i] = cmap[cworkB[i]];
1519           }
1520         } else {
1521           for (i=0; i<nzB; i++) {
1522             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1523             else break;
1524           }
1525           imark = i;
1526         }
1527         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1528         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1529       }
1530     } else {
1531       if (idx) *idx = 0;
1532       if (v)   *v   = 0;
1533     }
1534   }
1535   *nz = nztot;
1536   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1537   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1538   PetscFunctionReturn(0);
1539 }
1540 
1541 #undef __FUNCT__
1542 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1543 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1544 {
1545   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1546 
1547   PetscFunctionBegin;
1548   if (!aij->getrowactive) {
1549     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1550   }
1551   aij->getrowactive = PETSC_FALSE;
1552   PetscFunctionReturn(0);
1553 }
1554 
1555 #undef __FUNCT__
1556 #define __FUNCT__ "MatNorm_MPIAIJ"
1557 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1558 {
1559   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1560   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1561   PetscErrorCode ierr;
1562   PetscInt       i,j,cstart = mat->cmap->rstart;
1563   PetscReal      sum = 0.0;
1564   MatScalar      *v;
1565 
1566   PetscFunctionBegin;
1567   if (aij->size == 1) {
1568     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1569   } else {
1570     if (type == NORM_FROBENIUS) {
1571       v = amat->a;
1572       for (i=0; i<amat->nz; i++) {
1573 #if defined(PETSC_USE_COMPLEX)
1574         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1575 #else
1576         sum += (*v)*(*v); v++;
1577 #endif
1578       }
1579       v = bmat->a;
1580       for (i=0; i<bmat->nz; i++) {
1581 #if defined(PETSC_USE_COMPLEX)
1582         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1583 #else
1584         sum += (*v)*(*v); v++;
1585 #endif
1586       }
1587       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1588       *norm = sqrt(*norm);
1589     } else if (type == NORM_1) { /* max column norm */
1590       PetscReal *tmp,*tmp2;
1591       PetscInt  *jj,*garray = aij->garray;
1592       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1593       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1594       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1595       *norm = 0.0;
1596       v = amat->a; jj = amat->j;
1597       for (j=0; j<amat->nz; j++) {
1598         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1599       }
1600       v = bmat->a; jj = bmat->j;
1601       for (j=0; j<bmat->nz; j++) {
1602         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1603       }
1604       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1605       for (j=0; j<mat->cmap->N; j++) {
1606         if (tmp2[j] > *norm) *norm = tmp2[j];
1607       }
1608       ierr = PetscFree(tmp);CHKERRQ(ierr);
1609       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1610     } else if (type == NORM_INFINITY) { /* max row norm */
1611       PetscReal ntemp = 0.0;
1612       for (j=0; j<aij->A->rmap->n; j++) {
1613         v = amat->a + amat->i[j];
1614         sum = 0.0;
1615         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1616           sum += PetscAbsScalar(*v); v++;
1617         }
1618         v = bmat->a + bmat->i[j];
1619         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1620           sum += PetscAbsScalar(*v); v++;
1621         }
1622         if (sum > ntemp) ntemp = sum;
1623       }
1624       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1625     } else {
1626       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1627     }
1628   }
1629   PetscFunctionReturn(0);
1630 }
1631 
1632 #undef __FUNCT__
1633 #define __FUNCT__ "MatTranspose_MPIAIJ"
1634 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1635 {
1636   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1637   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1638   PetscErrorCode ierr;
1639   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1640   PetscInt       cstart=A->cmap->rstart,ncol;
1641   Mat            B;
1642   MatScalar      *array;
1643 
1644   PetscFunctionBegin;
1645   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1646 
1647   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1648   ai = Aloc->i; aj = Aloc->j;
1649   bi = Bloc->i; bj = Bloc->j;
1650   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1651     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1652     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1653     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1654     for (i=0; i<ai[ma]; i++){
1655       d_nnz[aj[i]] ++;
1656       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1657     }
1658 
1659     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1660     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1661     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1662     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1663     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1664   } else {
1665     B = *matout;
1666   }
1667 
1668   /* copy over the A part */
1669   array = Aloc->a;
1670   row = A->rmap->rstart;
1671   for (i=0; i<ma; i++) {
1672     ncol = ai[i+1]-ai[i];
1673     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1674     row++; array += ncol; aj += ncol;
1675   }
1676   aj = Aloc->j;
1677   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1678 
1679   /* copy over the B part */
1680   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1681   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1682   array = Bloc->a;
1683   row = A->rmap->rstart;
1684   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1685   cols_tmp = cols;
1686   for (i=0; i<mb; i++) {
1687     ncol = bi[i+1]-bi[i];
1688     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1689     row++; array += ncol; cols_tmp += ncol;
1690   }
1691   ierr = PetscFree(cols);CHKERRQ(ierr);
1692 
1693   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1694   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1695   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1696     *matout = B;
1697   } else {
1698     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1699   }
1700   PetscFunctionReturn(0);
1701 }
1702 
1703 #undef __FUNCT__
1704 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1705 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1706 {
1707   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1708   Mat            a = aij->A,b = aij->B;
1709   PetscErrorCode ierr;
1710   PetscInt       s1,s2,s3;
1711 
1712   PetscFunctionBegin;
1713   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1714   if (rr) {
1715     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1716     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1717     /* Overlap communication with computation. */
1718     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1719   }
1720   if (ll) {
1721     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1722     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1723     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1724   }
1725   /* scale  the diagonal block */
1726   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1727 
1728   if (rr) {
1729     /* Do a scatter end and then right scale the off-diagonal block */
1730     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1731     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1732   }
1733 
1734   PetscFunctionReturn(0);
1735 }
1736 
1737 #undef __FUNCT__
1738 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1739 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1740 {
1741   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1742   PetscErrorCode ierr;
1743 
1744   PetscFunctionBegin;
1745   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1746   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1747   PetscFunctionReturn(0);
1748 }
1749 #undef __FUNCT__
1750 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1751 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1752 {
1753   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1754   PetscErrorCode ierr;
1755 
1756   PetscFunctionBegin;
1757   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1758   PetscFunctionReturn(0);
1759 }
1760 
1761 #undef __FUNCT__
1762 #define __FUNCT__ "MatEqual_MPIAIJ"
1763 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1764 {
1765   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1766   Mat            a,b,c,d;
1767   PetscTruth     flg;
1768   PetscErrorCode ierr;
1769 
1770   PetscFunctionBegin;
1771   a = matA->A; b = matA->B;
1772   c = matB->A; d = matB->B;
1773 
1774   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1775   if (flg) {
1776     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1777   }
1778   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1779   PetscFunctionReturn(0);
1780 }
1781 
1782 #undef __FUNCT__
1783 #define __FUNCT__ "MatCopy_MPIAIJ"
1784 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1785 {
1786   PetscErrorCode ierr;
1787   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1788   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1789 
1790   PetscFunctionBegin;
1791   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1792   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1793     /* because of the column compression in the off-processor part of the matrix a->B,
1794        the number of columns in a->B and b->B may be different, hence we cannot call
1795        the MatCopy() directly on the two parts. If need be, we can provide a more
1796        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1797        then copying the submatrices */
1798     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1799   } else {
1800     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1801     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1802   }
1803   PetscFunctionReturn(0);
1804 }
1805 
1806 #undef __FUNCT__
1807 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1808 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1809 {
1810   PetscErrorCode ierr;
1811 
1812   PetscFunctionBegin;
1813   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1814   PetscFunctionReturn(0);
1815 }
1816 
1817 #include "petscblaslapack.h"
1818 #undef __FUNCT__
1819 #define __FUNCT__ "MatAXPY_MPIAIJ"
1820 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1821 {
1822   PetscErrorCode ierr;
1823   PetscInt       i;
1824   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1825   PetscBLASInt   bnz,one=1;
1826   Mat_SeqAIJ     *x,*y;
1827 
1828   PetscFunctionBegin;
1829   if (str == SAME_NONZERO_PATTERN) {
1830     PetscScalar alpha = a;
1831     x = (Mat_SeqAIJ *)xx->A->data;
1832     y = (Mat_SeqAIJ *)yy->A->data;
1833     bnz = PetscBLASIntCast(x->nz);
1834     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1835     x = (Mat_SeqAIJ *)xx->B->data;
1836     y = (Mat_SeqAIJ *)yy->B->data;
1837     bnz = PetscBLASIntCast(x->nz);
1838     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1839   } else if (str == SUBSET_NONZERO_PATTERN) {
1840     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1841 
1842     x = (Mat_SeqAIJ *)xx->B->data;
1843     y = (Mat_SeqAIJ *)yy->B->data;
1844     if (y->xtoy && y->XtoY != xx->B) {
1845       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1846       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1847     }
1848     if (!y->xtoy) { /* get xtoy */
1849       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1850       y->XtoY = xx->B;
1851       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1852     }
1853     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1854   } else {
1855     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1856   }
1857   PetscFunctionReturn(0);
1858 }
1859 
1860 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1861 
1862 #undef __FUNCT__
1863 #define __FUNCT__ "MatConjugate_MPIAIJ"
1864 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1865 {
1866 #if defined(PETSC_USE_COMPLEX)
1867   PetscErrorCode ierr;
1868   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1869 
1870   PetscFunctionBegin;
1871   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1872   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1873 #else
1874   PetscFunctionBegin;
1875 #endif
1876   PetscFunctionReturn(0);
1877 }
1878 
1879 #undef __FUNCT__
1880 #define __FUNCT__ "MatRealPart_MPIAIJ"
1881 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1882 {
1883   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1884   PetscErrorCode ierr;
1885 
1886   PetscFunctionBegin;
1887   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1888   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1889   PetscFunctionReturn(0);
1890 }
1891 
1892 #undef __FUNCT__
1893 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1894 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1895 {
1896   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1897   PetscErrorCode ierr;
1898 
1899   PetscFunctionBegin;
1900   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1901   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1902   PetscFunctionReturn(0);
1903 }
1904 
1905 #ifdef PETSC_HAVE_PBGL
1906 
1907 #include <boost/parallel/mpi/bsp_process_group.hpp>
1908 #include <boost/graph/distributed/ilu_default_graph.hpp>
1909 #include <boost/graph/distributed/ilu_0_block.hpp>
1910 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1911 #include <boost/graph/distributed/petsc/interface.hpp>
1912 #include <boost/multi_array.hpp>
1913 #include <boost/parallel/distributed_property_map->hpp>
1914 
1915 #undef __FUNCT__
1916 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1917 /*
1918   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1919 */
1920 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1921 {
1922   namespace petsc = boost::distributed::petsc;
1923 
1924   namespace graph_dist = boost::graph::distributed;
1925   using boost::graph::distributed::ilu_default::process_group_type;
1926   using boost::graph::ilu_permuted;
1927 
1928   PetscTruth      row_identity, col_identity;
1929   PetscContainer  c;
1930   PetscInt        m, n, M, N;
1931   PetscErrorCode  ierr;
1932 
1933   PetscFunctionBegin;
1934   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1935   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1936   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1937   if (!row_identity || !col_identity) {
1938     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1939   }
1940 
1941   process_group_type pg;
1942   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1943   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1944   lgraph_type&   level_graph = *lgraph_p;
1945   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1946 
1947   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1948   ilu_permuted(level_graph);
1949 
1950   /* put together the new matrix */
1951   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1952   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1953   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1954   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1955   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1956   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1957   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1958 
1959   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1960   ierr = PetscContainerSetPointer(c, lgraph_p);
1961   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1962   PetscFunctionReturn(0);
1963 }
1964 
1965 #undef __FUNCT__
1966 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1967 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1968 {
1969   PetscFunctionBegin;
1970   PetscFunctionReturn(0);
1971 }
1972 
1973 #undef __FUNCT__
1974 #define __FUNCT__ "MatSolve_MPIAIJ"
1975 /*
1976   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1977 */
1978 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1979 {
1980   namespace graph_dist = boost::graph::distributed;
1981 
1982   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1983   lgraph_type*   lgraph_p;
1984   PetscContainer c;
1985   PetscErrorCode ierr;
1986 
1987   PetscFunctionBegin;
1988   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1989   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1990   ierr = VecCopy(b, x);CHKERRQ(ierr);
1991 
1992   PetscScalar* array_x;
1993   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1994   PetscInt sx;
1995   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1996 
1997   PetscScalar* array_b;
1998   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1999   PetscInt sb;
2000   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2001 
2002   lgraph_type&   level_graph = *lgraph_p;
2003   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2004 
2005   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2006   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2007                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2008 
2009   typedef boost::iterator_property_map<array_ref_type::iterator,
2010                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2011   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2012                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2013 
2014   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2015 
2016   PetscFunctionReturn(0);
2017 }
2018 #endif
2019 
2020 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2021   PetscInt       nzlocal,nsends,nrecvs;
2022   PetscMPIInt    *send_rank;
2023   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
2024   PetscScalar    *sbuf_a,**rbuf_a;
2025   PetscErrorCode (*MatDestroy)(Mat);
2026 } Mat_Redundant;
2027 
2028 #undef __FUNCT__
2029 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2030 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2031 {
2032   PetscErrorCode       ierr;
2033   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2034   PetscInt             i;
2035 
2036   PetscFunctionBegin;
2037   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
2038   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2039   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2040   for (i=0; i<redund->nrecvs; i++){
2041     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2042     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2043   }
2044   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2045   ierr = PetscFree(redund);CHKERRQ(ierr);
2046   PetscFunctionReturn(0);
2047 }
2048 
2049 #undef __FUNCT__
2050 #define __FUNCT__ "MatDestroy_MatRedundant"
2051 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2052 {
2053   PetscErrorCode  ierr;
2054   PetscContainer  container;
2055   Mat_Redundant   *redund=PETSC_NULL;
2056 
2057   PetscFunctionBegin;
2058   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2059   if (container) {
2060     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2061   } else {
2062     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2063   }
2064   A->ops->destroy = redund->MatDestroy;
2065   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2066   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2067   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2068   PetscFunctionReturn(0);
2069 }
2070 
2071 #undef __FUNCT__
2072 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2073 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2074 {
2075   PetscMPIInt    rank,size;
2076   MPI_Comm       comm=((PetscObject)mat)->comm;
2077   PetscErrorCode ierr;
2078   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2079   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2080   PetscInt       *rowrange=mat->rmap->range;
2081   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2082   Mat            A=aij->A,B=aij->B,C=*matredundant;
2083   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2084   PetscScalar    *sbuf_a;
2085   PetscInt       nzlocal=a->nz+b->nz;
2086   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2087   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2088   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2089   MatScalar      *aworkA,*aworkB;
2090   PetscScalar    *vals;
2091   PetscMPIInt    tag1,tag2,tag3,imdex;
2092   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2093                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2094   MPI_Status     recv_status,*send_status;
2095   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2096   PetscInt       **rbuf_j=PETSC_NULL;
2097   PetscScalar    **rbuf_a=PETSC_NULL;
2098   Mat_Redundant  *redund=PETSC_NULL;
2099   PetscContainer container;
2100 
2101   PetscFunctionBegin;
2102   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2103   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2104 
2105   if (reuse == MAT_REUSE_MATRIX) {
2106     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2107     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2108     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2109     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2110     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2111     if (container) {
2112       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2113     } else {
2114       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2115     }
2116     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2117 
2118     nsends    = redund->nsends;
2119     nrecvs    = redund->nrecvs;
2120     send_rank = redund->send_rank; recv_rank = send_rank + size;
2121     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
2122     sbuf_j    = redund->sbuf_j;
2123     sbuf_a    = redund->sbuf_a;
2124     rbuf_j    = redund->rbuf_j;
2125     rbuf_a    = redund->rbuf_a;
2126   }
2127 
2128   if (reuse == MAT_INITIAL_MATRIX){
2129     PetscMPIInt  subrank,subsize;
2130     PetscInt     nleftover,np_subcomm;
2131     /* get the destination processors' id send_rank, nsends and nrecvs */
2132     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2133     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2134     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
2135     recv_rank = send_rank + size;
2136     np_subcomm = size/nsubcomm;
2137     nleftover  = size - nsubcomm*np_subcomm;
2138     nsends = 0; nrecvs = 0;
2139     for (i=0; i<size; i++){ /* i=rank*/
2140       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2141         send_rank[nsends] = i; nsends++;
2142         recv_rank[nrecvs++] = i;
2143       }
2144     }
2145     if (rank >= size - nleftover){/* this proc is a leftover processor */
2146       i = size-nleftover-1;
2147       j = 0;
2148       while (j < nsubcomm - nleftover){
2149         send_rank[nsends++] = i;
2150         i--; j++;
2151       }
2152     }
2153 
2154     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2155       for (i=0; i<nleftover; i++){
2156         recv_rank[nrecvs++] = size-nleftover+i;
2157       }
2158     }
2159 
2160     /* allocate sbuf_j, sbuf_a */
2161     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2162     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2163     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2164   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2165 
2166   /* copy mat's local entries into the buffers */
2167   if (reuse == MAT_INITIAL_MATRIX){
2168     rownz_max = 0;
2169     rptr = sbuf_j;
2170     cols = sbuf_j + rend-rstart + 1;
2171     vals = sbuf_a;
2172     rptr[0] = 0;
2173     for (i=0; i<rend-rstart; i++){
2174       row = i + rstart;
2175       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2176       ncols  = nzA + nzB;
2177       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2178       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2179       /* load the column indices for this row into cols */
2180       lwrite = 0;
2181       for (l=0; l<nzB; l++) {
2182         if ((ctmp = bmap[cworkB[l]]) < cstart){
2183           vals[lwrite]   = aworkB[l];
2184           cols[lwrite++] = ctmp;
2185         }
2186       }
2187       for (l=0; l<nzA; l++){
2188         vals[lwrite]   = aworkA[l];
2189         cols[lwrite++] = cstart + cworkA[l];
2190       }
2191       for (l=0; l<nzB; l++) {
2192         if ((ctmp = bmap[cworkB[l]]) >= cend){
2193           vals[lwrite]   = aworkB[l];
2194           cols[lwrite++] = ctmp;
2195         }
2196       }
2197       vals += ncols;
2198       cols += ncols;
2199       rptr[i+1] = rptr[i] + ncols;
2200       if (rownz_max < ncols) rownz_max = ncols;
2201     }
2202     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2203   } else { /* only copy matrix values into sbuf_a */
2204     rptr = sbuf_j;
2205     vals = sbuf_a;
2206     rptr[0] = 0;
2207     for (i=0; i<rend-rstart; i++){
2208       row = i + rstart;
2209       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2210       ncols  = nzA + nzB;
2211       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2212       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2213       lwrite = 0;
2214       for (l=0; l<nzB; l++) {
2215         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2216       }
2217       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2218       for (l=0; l<nzB; l++) {
2219         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2220       }
2221       vals += ncols;
2222       rptr[i+1] = rptr[i] + ncols;
2223     }
2224   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2225 
2226   /* send nzlocal to others, and recv other's nzlocal */
2227   /*--------------------------------------------------*/
2228   if (reuse == MAT_INITIAL_MATRIX){
2229     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2230     s_waits2 = s_waits3 + nsends;
2231     s_waits1 = s_waits2 + nsends;
2232     r_waits1 = s_waits1 + nsends;
2233     r_waits2 = r_waits1 + nrecvs;
2234     r_waits3 = r_waits2 + nrecvs;
2235   } else {
2236     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2237     r_waits3 = s_waits3 + nsends;
2238   }
2239 
2240   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2241   if (reuse == MAT_INITIAL_MATRIX){
2242     /* get new tags to keep the communication clean */
2243     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2244     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2245     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2246     rbuf_nz = sbuf_nz + nsends;
2247 
2248     /* post receives of other's nzlocal */
2249     for (i=0; i<nrecvs; i++){
2250       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2251     }
2252     /* send nzlocal to others */
2253     for (i=0; i<nsends; i++){
2254       sbuf_nz[i] = nzlocal;
2255       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2256     }
2257     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2258     count = nrecvs;
2259     while (count) {
2260       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2261       recv_rank[imdex] = recv_status.MPI_SOURCE;
2262       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2263       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2264 
2265       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2266       rbuf_nz[imdex] += i + 2;
2267       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2268       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2269       count--;
2270     }
2271     /* wait on sends of nzlocal */
2272     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2273     /* send mat->i,j to others, and recv from other's */
2274     /*------------------------------------------------*/
2275     for (i=0; i<nsends; i++){
2276       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2277       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2278     }
2279     /* wait on receives of mat->i,j */
2280     /*------------------------------*/
2281     count = nrecvs;
2282     while (count) {
2283       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2284       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2285       count--;
2286     }
2287     /* wait on sends of mat->i,j */
2288     /*---------------------------*/
2289     if (nsends) {
2290       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2291     }
2292   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2293 
2294   /* post receives, send and receive mat->a */
2295   /*----------------------------------------*/
2296   for (imdex=0; imdex<nrecvs; imdex++) {
2297     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2298   }
2299   for (i=0; i<nsends; i++){
2300     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2301   }
2302   count = nrecvs;
2303   while (count) {
2304     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2305     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2306     count--;
2307   }
2308   if (nsends) {
2309     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2310   }
2311 
2312   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2313 
2314   /* create redundant matrix */
2315   /*-------------------------*/
2316   if (reuse == MAT_INITIAL_MATRIX){
2317     /* compute rownz_max for preallocation */
2318     for (imdex=0; imdex<nrecvs; imdex++){
2319       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2320       rptr = rbuf_j[imdex];
2321       for (i=0; i<j; i++){
2322         ncols = rptr[i+1] - rptr[i];
2323         if (rownz_max < ncols) rownz_max = ncols;
2324       }
2325     }
2326 
2327     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2328     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2329     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2330     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2331     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2332   } else {
2333     C = *matredundant;
2334   }
2335 
2336   /* insert local matrix entries */
2337   rptr = sbuf_j;
2338   cols = sbuf_j + rend-rstart + 1;
2339   vals = sbuf_a;
2340   for (i=0; i<rend-rstart; i++){
2341     row   = i + rstart;
2342     ncols = rptr[i+1] - rptr[i];
2343     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2344     vals += ncols;
2345     cols += ncols;
2346   }
2347   /* insert received matrix entries */
2348   for (imdex=0; imdex<nrecvs; imdex++){
2349     rstart = rowrange[recv_rank[imdex]];
2350     rend   = rowrange[recv_rank[imdex]+1];
2351     rptr = rbuf_j[imdex];
2352     cols = rbuf_j[imdex] + rend-rstart + 1;
2353     vals = rbuf_a[imdex];
2354     for (i=0; i<rend-rstart; i++){
2355       row   = i + rstart;
2356       ncols = rptr[i+1] - rptr[i];
2357       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2358       vals += ncols;
2359       cols += ncols;
2360     }
2361   }
2362   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2363   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2364   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2365   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2366   if (reuse == MAT_INITIAL_MATRIX){
2367     PetscContainer container;
2368     *matredundant = C;
2369     /* create a supporting struct and attach it to C for reuse */
2370     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2371     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2372     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2373     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2374     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2375 
2376     redund->nzlocal = nzlocal;
2377     redund->nsends  = nsends;
2378     redund->nrecvs  = nrecvs;
2379     redund->send_rank = send_rank;
2380     redund->sbuf_nz = sbuf_nz;
2381     redund->sbuf_j  = sbuf_j;
2382     redund->sbuf_a  = sbuf_a;
2383     redund->rbuf_j  = rbuf_j;
2384     redund->rbuf_a  = rbuf_a;
2385 
2386     redund->MatDestroy = C->ops->destroy;
2387     C->ops->destroy    = MatDestroy_MatRedundant;
2388   }
2389   PetscFunctionReturn(0);
2390 }
2391 
2392 #undef __FUNCT__
2393 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2394 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2395 {
2396   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2397   PetscErrorCode ierr;
2398   PetscInt       i,*idxb = 0;
2399   PetscScalar    *va,*vb;
2400   Vec            vtmp;
2401 
2402   PetscFunctionBegin;
2403   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2404   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2405   if (idx) {
2406     for (i=0; i<A->rmap->n; i++) {
2407       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2408     }
2409   }
2410 
2411   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2412   if (idx) {
2413     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2414   }
2415   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2416   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2417 
2418   for (i=0; i<A->rmap->n; i++){
2419     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2420       va[i] = vb[i];
2421       if (idx) idx[i] = a->garray[idxb[i]];
2422     }
2423   }
2424 
2425   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2426   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2427   if (idxb) {
2428     ierr = PetscFree(idxb);CHKERRQ(ierr);
2429   }
2430   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2431   PetscFunctionReturn(0);
2432 }
2433 
2434 #undef __FUNCT__
2435 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2436 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2437 {
2438   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2439   PetscErrorCode ierr;
2440   PetscInt       i,*idxb = 0;
2441   PetscScalar    *va,*vb;
2442   Vec            vtmp;
2443 
2444   PetscFunctionBegin;
2445   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2446   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2447   if (idx) {
2448     for (i=0; i<A->cmap->n; i++) {
2449       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2450     }
2451   }
2452 
2453   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2454   if (idx) {
2455     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2456   }
2457   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2458   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2459 
2460   for (i=0; i<A->rmap->n; i++){
2461     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2462       va[i] = vb[i];
2463       if (idx) idx[i] = a->garray[idxb[i]];
2464     }
2465   }
2466 
2467   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2468   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2469   if (idxb) {
2470     ierr = PetscFree(idxb);CHKERRQ(ierr);
2471   }
2472   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2473   PetscFunctionReturn(0);
2474 }
2475 
2476 #undef __FUNCT__
2477 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2478 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2479 {
2480   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2481   PetscInt       n      = A->rmap->n;
2482   PetscInt       cstart = A->cmap->rstart;
2483   PetscInt      *cmap   = mat->garray;
2484   PetscInt      *diagIdx, *offdiagIdx;
2485   Vec            diagV, offdiagV;
2486   PetscScalar   *a, *diagA, *offdiagA;
2487   PetscInt       r;
2488   PetscErrorCode ierr;
2489 
2490   PetscFunctionBegin;
2491   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2492   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2493   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2494   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2495   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2496   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2497   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2498   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2499   for(r = 0; r < n; ++r) {
2500     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2501       a[r]   = diagA[r];
2502       idx[r] = cstart + diagIdx[r];
2503     } else {
2504       a[r]   = offdiagA[r];
2505       idx[r] = cmap[offdiagIdx[r]];
2506     }
2507   }
2508   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2509   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2510   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2511   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2512   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2513   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2514   PetscFunctionReturn(0);
2515 }
2516 
2517 #undef __FUNCT__
2518 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2519 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2520 {
2521   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2522   PetscInt       n      = A->rmap->n;
2523   PetscInt       cstart = A->cmap->rstart;
2524   PetscInt      *cmap   = mat->garray;
2525   PetscInt      *diagIdx, *offdiagIdx;
2526   Vec            diagV, offdiagV;
2527   PetscScalar   *a, *diagA, *offdiagA;
2528   PetscInt       r;
2529   PetscErrorCode ierr;
2530 
2531   PetscFunctionBegin;
2532   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2533   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2534   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2535   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2536   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2537   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2538   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2539   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2540   for(r = 0; r < n; ++r) {
2541     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2542       a[r]   = diagA[r];
2543       idx[r] = cstart + diagIdx[r];
2544     } else {
2545       a[r]   = offdiagA[r];
2546       idx[r] = cmap[offdiagIdx[r]];
2547     }
2548   }
2549   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2550   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2551   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2552   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2553   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2554   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2555   PetscFunctionReturn(0);
2556 }
2557 
2558 #undef __FUNCT__
2559 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2560 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2561 {
2562   PetscErrorCode ierr;
2563   Mat            *dummy;
2564 
2565   PetscFunctionBegin;
2566   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2567   *newmat = *dummy;
2568   ierr = PetscFree(dummy);CHKERRQ(ierr);
2569   PetscFunctionReturn(0);
2570 }
2571 
2572 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2573 /* -------------------------------------------------------------------*/
2574 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2575        MatGetRow_MPIAIJ,
2576        MatRestoreRow_MPIAIJ,
2577        MatMult_MPIAIJ,
2578 /* 4*/ MatMultAdd_MPIAIJ,
2579        MatMultTranspose_MPIAIJ,
2580        MatMultTransposeAdd_MPIAIJ,
2581 #ifdef PETSC_HAVE_PBGL
2582        MatSolve_MPIAIJ,
2583 #else
2584        0,
2585 #endif
2586        0,
2587        0,
2588 /*10*/ 0,
2589        0,
2590        0,
2591        MatRelax_MPIAIJ,
2592        MatTranspose_MPIAIJ,
2593 /*15*/ MatGetInfo_MPIAIJ,
2594        MatEqual_MPIAIJ,
2595        MatGetDiagonal_MPIAIJ,
2596        MatDiagonalScale_MPIAIJ,
2597        MatNorm_MPIAIJ,
2598 /*20*/ MatAssemblyBegin_MPIAIJ,
2599        MatAssemblyEnd_MPIAIJ,
2600        MatSetOption_MPIAIJ,
2601        MatZeroEntries_MPIAIJ,
2602 /*24*/ MatZeroRows_MPIAIJ,
2603        0,
2604 #ifdef PETSC_HAVE_PBGL
2605        0,
2606 #else
2607        0,
2608 #endif
2609        0,
2610        0,
2611 /*29*/ MatSetUpPreallocation_MPIAIJ,
2612 #ifdef PETSC_HAVE_PBGL
2613        0,
2614 #else
2615        0,
2616 #endif
2617        0,
2618        0,
2619        0,
2620 /*34*/ MatDuplicate_MPIAIJ,
2621        0,
2622        0,
2623        0,
2624        0,
2625 /*39*/ MatAXPY_MPIAIJ,
2626        MatGetSubMatrices_MPIAIJ,
2627        MatIncreaseOverlap_MPIAIJ,
2628        MatGetValues_MPIAIJ,
2629        MatCopy_MPIAIJ,
2630 /*44*/ MatGetRowMax_MPIAIJ,
2631        MatScale_MPIAIJ,
2632        0,
2633        0,
2634        0,
2635 /*49*/ MatSetBlockSize_MPIAIJ,
2636        0,
2637        0,
2638        0,
2639        0,
2640 /*54*/ MatFDColoringCreate_MPIAIJ,
2641        0,
2642        MatSetUnfactored_MPIAIJ,
2643        MatPermute_MPIAIJ,
2644        0,
2645 /*59*/ MatGetSubMatrix_MPIAIJ,
2646        MatDestroy_MPIAIJ,
2647        MatView_MPIAIJ,
2648        0,
2649        0,
2650 /*64*/ 0,
2651        0,
2652        0,
2653        0,
2654        0,
2655 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2656        MatGetRowMinAbs_MPIAIJ,
2657        0,
2658        MatSetColoring_MPIAIJ,
2659 #if defined(PETSC_HAVE_ADIC)
2660        MatSetValuesAdic_MPIAIJ,
2661 #else
2662        0,
2663 #endif
2664        MatSetValuesAdifor_MPIAIJ,
2665 /*75*/ MatFDColoringApply_AIJ,
2666        0,
2667        0,
2668        0,
2669        0,
2670 /*80*/ 0,
2671        0,
2672        0,
2673 /*83*/ MatLoad_MPIAIJ,
2674        0,
2675        0,
2676        0,
2677        0,
2678        0,
2679 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2680        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2681        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2682        MatPtAP_Basic,
2683        MatPtAPSymbolic_MPIAIJ,
2684 /*94*/ MatPtAPNumeric_MPIAIJ,
2685        0,
2686        0,
2687        0,
2688        0,
2689 /*99*/ 0,
2690        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2691        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2692        MatConjugate_MPIAIJ,
2693        0,
2694 /*104*/MatSetValuesRow_MPIAIJ,
2695        MatRealPart_MPIAIJ,
2696        MatImaginaryPart_MPIAIJ,
2697        0,
2698        0,
2699 /*109*/0,
2700        MatGetRedundantMatrix_MPIAIJ,
2701        MatGetRowMin_MPIAIJ,
2702        0,
2703        0,
2704 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2705        0,
2706        0,
2707        0,
2708        0,
2709        0
2710 };
2711 
2712 /* ----------------------------------------------------------------------------------------*/
2713 
2714 EXTERN_C_BEGIN
2715 #undef __FUNCT__
2716 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2717 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2718 {
2719   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2720   PetscErrorCode ierr;
2721 
2722   PetscFunctionBegin;
2723   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2724   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2725   PetscFunctionReturn(0);
2726 }
2727 EXTERN_C_END
2728 
2729 EXTERN_C_BEGIN
2730 #undef __FUNCT__
2731 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2732 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2733 {
2734   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2735   PetscErrorCode ierr;
2736 
2737   PetscFunctionBegin;
2738   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2739   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2740   PetscFunctionReturn(0);
2741 }
2742 EXTERN_C_END
2743 
2744 #include "petscpc.h"
2745 EXTERN_C_BEGIN
2746 #undef __FUNCT__
2747 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2748 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2749 {
2750   Mat_MPIAIJ     *b;
2751   PetscErrorCode ierr;
2752   PetscInt       i;
2753 
2754   PetscFunctionBegin;
2755   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2756   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2757   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2758   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2759 
2760   ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2761   ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2762   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2763   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2764   if (d_nnz) {
2765     for (i=0; i<B->rmap->n; i++) {
2766       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2767     }
2768   }
2769   if (o_nnz) {
2770     for (i=0; i<B->rmap->n; i++) {
2771       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2772     }
2773   }
2774   b = (Mat_MPIAIJ*)B->data;
2775 
2776   if (!B->preallocated) {
2777     /* Explicitly create 2 MATSEQAIJ matrices. */
2778     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2779     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2780     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2781     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2782     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2783     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2784     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2785     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2786   }
2787 
2788   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2789   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2790   B->preallocated = PETSC_TRUE;
2791   PetscFunctionReturn(0);
2792 }
2793 EXTERN_C_END
2794 
2795 #undef __FUNCT__
2796 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2797 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2798 {
2799   Mat            mat;
2800   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2801   PetscErrorCode ierr;
2802 
2803   PetscFunctionBegin;
2804   *newmat       = 0;
2805   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2806   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2807   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2808   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2809   a    = (Mat_MPIAIJ*)mat->data;
2810 
2811   mat->factor       = matin->factor;
2812   mat->rmap->bs      = matin->rmap->bs;
2813   mat->assembled    = PETSC_TRUE;
2814   mat->insertmode   = NOT_SET_VALUES;
2815   mat->preallocated = PETSC_TRUE;
2816 
2817   a->size           = oldmat->size;
2818   a->rank           = oldmat->rank;
2819   a->donotstash     = oldmat->donotstash;
2820   a->roworiented    = oldmat->roworiented;
2821   a->rowindices     = 0;
2822   a->rowvalues      = 0;
2823   a->getrowactive   = PETSC_FALSE;
2824 
2825   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr);
2826   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr);
2827 
2828   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2829   if (oldmat->colmap) {
2830 #if defined (PETSC_USE_CTABLE)
2831     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2832 #else
2833     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2834     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2835     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2836 #endif
2837   } else a->colmap = 0;
2838   if (oldmat->garray) {
2839     PetscInt len;
2840     len  = oldmat->B->cmap->n;
2841     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2842     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2843     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2844   } else a->garray = 0;
2845 
2846   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2847   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2848   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2849   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2850   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2851   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2852   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2853   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2854   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2855   *newmat = mat;
2856   PetscFunctionReturn(0);
2857 }
2858 
2859 #include "petscsys.h"
2860 
2861 #undef __FUNCT__
2862 #define __FUNCT__ "MatLoad_MPIAIJ"
2863 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2864 {
2865   Mat            A;
2866   PetscScalar    *vals,*svals;
2867   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2868   MPI_Status     status;
2869   PetscErrorCode ierr;
2870   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz;
2871   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2872   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2873   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2874   PetscInt       cend,cstart,n,*rowners;
2875   int            fd;
2876 
2877   PetscFunctionBegin;
2878   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2879   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2880   if (!rank) {
2881     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2882     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2883     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2884   }
2885 
2886   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2887   M = header[1]; N = header[2];
2888   /* determine ownership of all rows */
2889   m    = M/size + ((M % size) > rank);
2890   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2891   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2892 
2893   /* First process needs enough room for process with most rows */
2894   if (!rank) {
2895     mmax       = rowners[1];
2896     for (i=2; i<size; i++) {
2897       mmax = PetscMax(mmax,rowners[i]);
2898     }
2899   } else mmax = m;
2900 
2901   rowners[0] = 0;
2902   for (i=2; i<=size; i++) {
2903     rowners[i] += rowners[i-1];
2904   }
2905   rstart = rowners[rank];
2906   rend   = rowners[rank+1];
2907 
2908   /* distribute row lengths to all processors */
2909   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2910   if (!rank) {
2911     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2912     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2913     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2914     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2915     for (j=0; j<m; j++) {
2916       procsnz[0] += ourlens[j];
2917     }
2918     for (i=1; i<size; i++) {
2919       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2920       /* calculate the number of nonzeros on each processor */
2921       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2922         procsnz[i] += rowlengths[j];
2923       }
2924       mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]);
2925       ierr   = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2926     }
2927     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2928   } else {
2929     mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr);
2930     ierr   = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2931   }
2932 
2933   if (!rank) {
2934     /* determine max buffer needed and allocate it */
2935     maxnz = 0;
2936     for (i=0; i<size; i++) {
2937       maxnz = PetscMax(maxnz,procsnz[i]);
2938     }
2939     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2940 
2941     /* read in my part of the matrix column indices  */
2942     nz   = procsnz[0];
2943     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2944     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2945 
2946     /* read in every one elses and ship off */
2947     for (i=1; i<size; i++) {
2948       nz     = procsnz[i];
2949       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2950       mpicnt = PetscMPIIntCast(nz);
2951       ierr   = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2952     }
2953     ierr = PetscFree(cols);CHKERRQ(ierr);
2954   } else {
2955     /* determine buffer space needed for message */
2956     nz = 0;
2957     for (i=0; i<m; i++) {
2958       nz += ourlens[i];
2959     }
2960     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2961 
2962     /* receive message of column indices*/
2963     mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr);
2964     ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2965     ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr);
2966     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
2967     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
2968     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
2969   }
2970 
2971   /* determine column ownership if matrix is not square */
2972   if (N != M) {
2973     n      = N/size + ((N % size) > rank);
2974     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2975     cstart = cend - n;
2976   } else {
2977     cstart = rstart;
2978     cend   = rend;
2979     n      = cend - cstart;
2980   }
2981 
2982   /* loop over local rows, determining number of off diagonal entries */
2983   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2984   jj = 0;
2985   for (i=0; i<m; i++) {
2986     for (j=0; j<ourlens[i]; j++) {
2987       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2988       jj++;
2989     }
2990   }
2991 
2992   /* create our matrix */
2993   for (i=0; i<m; i++) {
2994     ourlens[i] -= offlens[i];
2995   }
2996   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2997   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2998   ierr = MatSetType(A,type);CHKERRQ(ierr);
2999   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
3000 
3001   for (i=0; i<m; i++) {
3002     ourlens[i] += offlens[i];
3003   }
3004 
3005   if (!rank) {
3006     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3007 
3008     /* read in my part of the matrix numerical values  */
3009     nz   = procsnz[0];
3010     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3011 
3012     /* insert into matrix */
3013     jj      = rstart;
3014     smycols = mycols;
3015     svals   = vals;
3016     for (i=0; i<m; i++) {
3017       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3018       smycols += ourlens[i];
3019       svals   += ourlens[i];
3020       jj++;
3021     }
3022 
3023     /* read in other processors and ship out */
3024     for (i=1; i<size; i++) {
3025       nz     = procsnz[i];
3026       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3027       mpicnt = PetscMPIIntCast(nz);
3028       ierr   = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3029     }
3030     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3031   } else {
3032     /* receive numeric values */
3033     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3034 
3035     /* receive message of values*/
3036     mpicnt = PetscMPIIntCast(nz);
3037     ierr   = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
3038     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr);
3039     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
3040     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
3041     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
3042 
3043     /* insert into matrix */
3044     jj      = rstart;
3045     smycols = mycols;
3046     svals   = vals;
3047     for (i=0; i<m; i++) {
3048       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3049       smycols += ourlens[i];
3050       svals   += ourlens[i];
3051       jj++;
3052     }
3053   }
3054   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3055   ierr = PetscFree(vals);CHKERRQ(ierr);
3056   ierr = PetscFree(mycols);CHKERRQ(ierr);
3057   ierr = PetscFree(rowners);CHKERRQ(ierr);
3058 
3059   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3060   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3061   *newmat = A;
3062   PetscFunctionReturn(0);
3063 }
3064 
3065 #undef __FUNCT__
3066 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3067 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3068 {
3069   PetscErrorCode ierr;
3070   IS             iscol_local;
3071   PetscInt       csize;
3072 
3073   PetscFunctionBegin;
3074   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3075   if (call == MAT_REUSE_MATRIX) {
3076     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3077     if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3078   } else {
3079     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3080   }
3081   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3082   if (call == MAT_INITIAL_MATRIX) {
3083     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3084     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3085   }
3086   PetscFunctionReturn(0);
3087 }
3088 
3089 #undef __FUNCT__
3090 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3091 /*
3092     Not great since it makes two copies of the submatrix, first an SeqAIJ
3093   in local and then by concatenating the local matrices the end result.
3094   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3095 
3096   Note: This requires a sequential iscol with all indices.
3097 */
3098 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3099 {
3100   PetscErrorCode ierr;
3101   PetscMPIInt    rank,size;
3102   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3103   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3104   Mat            *local,M,Mreuse;
3105   MatScalar      *vwork,*aa;
3106   MPI_Comm       comm = ((PetscObject)mat)->comm;
3107   Mat_SeqAIJ     *aij;
3108 
3109 
3110   PetscFunctionBegin;
3111   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3112   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3113 
3114   if (call ==  MAT_REUSE_MATRIX) {
3115     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3116     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3117     local = &Mreuse;
3118     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3119   } else {
3120     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3121     Mreuse = *local;
3122     ierr   = PetscFree(local);CHKERRQ(ierr);
3123   }
3124 
3125   /*
3126       m - number of local rows
3127       n - number of columns (same on all processors)
3128       rstart - first row in new global matrix generated
3129   */
3130   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3131   if (call == MAT_INITIAL_MATRIX) {
3132     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3133     ii  = aij->i;
3134     jj  = aij->j;
3135 
3136     /*
3137         Determine the number of non-zeros in the diagonal and off-diagonal
3138         portions of the matrix in order to do correct preallocation
3139     */
3140 
3141     /* first get start and end of "diagonal" columns */
3142     if (csize == PETSC_DECIDE) {
3143       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3144       if (mglobal == n) { /* square matrix */
3145 	nlocal = m;
3146       } else {
3147         nlocal = n/size + ((n % size) > rank);
3148       }
3149     } else {
3150       nlocal = csize;
3151     }
3152     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3153     rstart = rend - nlocal;
3154     if (rank == size - 1 && rend != n) {
3155       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3156     }
3157 
3158     /* next, compute all the lengths */
3159     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3160     olens = dlens + m;
3161     for (i=0; i<m; i++) {
3162       jend = ii[i+1] - ii[i];
3163       olen = 0;
3164       dlen = 0;
3165       for (j=0; j<jend; j++) {
3166         if (*jj < rstart || *jj >= rend) olen++;
3167         else dlen++;
3168         jj++;
3169       }
3170       olens[i] = olen;
3171       dlens[i] = dlen;
3172     }
3173     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3174     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3175     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3176     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3177     ierr = PetscFree(dlens);CHKERRQ(ierr);
3178   } else {
3179     PetscInt ml,nl;
3180 
3181     M = *newmat;
3182     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3183     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3184     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3185     /*
3186          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3187        rather than the slower MatSetValues().
3188     */
3189     M->was_assembled = PETSC_TRUE;
3190     M->assembled     = PETSC_FALSE;
3191   }
3192   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3193   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3194   ii  = aij->i;
3195   jj  = aij->j;
3196   aa  = aij->a;
3197   for (i=0; i<m; i++) {
3198     row   = rstart + i;
3199     nz    = ii[i+1] - ii[i];
3200     cwork = jj;     jj += nz;
3201     vwork = aa;     aa += nz;
3202     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3203   }
3204 
3205   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3206   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3207   *newmat = M;
3208 
3209   /* save submatrix used in processor for next request */
3210   if (call ==  MAT_INITIAL_MATRIX) {
3211     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3212     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3213   }
3214 
3215   PetscFunctionReturn(0);
3216 }
3217 
3218 EXTERN_C_BEGIN
3219 #undef __FUNCT__
3220 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3221 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3222 {
3223   PetscInt       m,cstart, cend,j,nnz,i,d;
3224   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3225   const PetscInt *JJ;
3226   PetscScalar    *values;
3227   PetscErrorCode ierr;
3228 
3229   PetscFunctionBegin;
3230   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3231 
3232   ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3233   ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3234   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
3235   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
3236   m      = B->rmap->n;
3237   cstart = B->cmap->rstart;
3238   cend   = B->cmap->rend;
3239   rstart = B->rmap->rstart;
3240 
3241   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
3242   o_nnz = d_nnz + m;
3243 
3244 #if defined(PETSC_USE_DEBUGGING)
3245   for (i=0; i<m; i++) {
3246     nnz     = Ii[i+1]- Ii[i];
3247     JJ      = J + Ii[i];
3248     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3249     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3250     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3251     for (j=1; j<nnz; j++) {
3252       if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j);
3253     }
3254   }
3255 #endif
3256 
3257   for (i=0; i<m; i++) {
3258     nnz     = Ii[i+1]- Ii[i];
3259     JJ      = J + Ii[i];
3260     nnz_max = PetscMax(nnz_max,nnz);
3261     for (j=0; j<nnz; j++) {
3262       if (*JJ >= cstart) break;
3263       JJ++;
3264     }
3265     d = 0;
3266     for (; j<nnz; j++) {
3267       if (*JJ++ >= cend) break;
3268       d++;
3269     }
3270     d_nnz[i] = d;
3271     o_nnz[i] = nnz - d;
3272   }
3273   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3274   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
3275 
3276   if (v) values = (PetscScalar*)v;
3277   else {
3278     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3279     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3280   }
3281 
3282   for (i=0; i<m; i++) {
3283     ii   = i + rstart;
3284     nnz  = Ii[i+1]- Ii[i];
3285     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3286   }
3287   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3288   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3289 
3290   if (!v) {
3291     ierr = PetscFree(values);CHKERRQ(ierr);
3292   }
3293   PetscFunctionReturn(0);
3294 }
3295 EXTERN_C_END
3296 
3297 #undef __FUNCT__
3298 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3299 /*@
3300    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3301    (the default parallel PETSc format).
3302 
3303    Collective on MPI_Comm
3304 
3305    Input Parameters:
3306 +  B - the matrix
3307 .  i - the indices into j for the start of each local row (starts with zero)
3308 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3309 -  v - optional values in the matrix
3310 
3311    Level: developer
3312 
3313    Notes:
3314        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3315      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3316      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3317 
3318        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3319 
3320        The format which is used for the sparse matrix input, is equivalent to a
3321     row-major ordering.. i.e for the following matrix, the input data expected is
3322     as shown:
3323 
3324         1 0 0
3325         2 0 3     P0
3326        -------
3327         4 5 6     P1
3328 
3329      Process0 [P0]: rows_owned=[0,1]
3330         i =  {0,1,3}  [size = nrow+1  = 2+1]
3331         j =  {0,0,2}  [size = nz = 6]
3332         v =  {1,2,3}  [size = nz = 6]
3333 
3334      Process1 [P1]: rows_owned=[2]
3335         i =  {0,3}    [size = nrow+1  = 1+1]
3336         j =  {0,1,2}  [size = nz = 6]
3337         v =  {4,5,6}  [size = nz = 6]
3338 
3339       The column indices for each row MUST be sorted.
3340 
3341 .keywords: matrix, aij, compressed row, sparse, parallel
3342 
3343 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3344           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3345 @*/
3346 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3347 {
3348   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3349 
3350   PetscFunctionBegin;
3351   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3352   if (f) {
3353     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3354   }
3355   PetscFunctionReturn(0);
3356 }
3357 
3358 #undef __FUNCT__
3359 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3360 /*@C
3361    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3362    (the default parallel PETSc format).  For good matrix assembly performance
3363    the user should preallocate the matrix storage by setting the parameters
3364    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3365    performance can be increased by more than a factor of 50.
3366 
3367    Collective on MPI_Comm
3368 
3369    Input Parameters:
3370 +  A - the matrix
3371 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3372            (same value is used for all local rows)
3373 .  d_nnz - array containing the number of nonzeros in the various rows of the
3374            DIAGONAL portion of the local submatrix (possibly different for each row)
3375            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3376            The size of this array is equal to the number of local rows, i.e 'm'.
3377            You must leave room for the diagonal entry even if it is zero.
3378 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3379            submatrix (same value is used for all local rows).
3380 -  o_nnz - array containing the number of nonzeros in the various rows of the
3381            OFF-DIAGONAL portion of the local submatrix (possibly different for
3382            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3383            structure. The size of this array is equal to the number
3384            of local rows, i.e 'm'.
3385 
3386    If the *_nnz parameter is given then the *_nz parameter is ignored
3387 
3388    The AIJ format (also called the Yale sparse matrix format or
3389    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3390    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3391 
3392    The parallel matrix is partitioned such that the first m0 rows belong to
3393    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3394    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3395 
3396    The DIAGONAL portion of the local submatrix of a processor can be defined
3397    as the submatrix which is obtained by extraction the part corresponding
3398    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3399    first row that belongs to the processor, and r2 is the last row belonging
3400    to the this processor. This is a square mxm matrix. The remaining portion
3401    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3402 
3403    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3404 
3405    You can call MatGetInfo() to get information on how effective the preallocation was;
3406    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3407    You can also run with the option -info and look for messages with the string
3408    malloc in them to see if additional memory allocation was needed.
3409 
3410    Example usage:
3411 
3412    Consider the following 8x8 matrix with 34 non-zero values, that is
3413    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3414    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3415    as follows:
3416 
3417 .vb
3418             1  2  0  |  0  3  0  |  0  4
3419     Proc0   0  5  6  |  7  0  0  |  8  0
3420             9  0 10  | 11  0  0  | 12  0
3421     -------------------------------------
3422            13  0 14  | 15 16 17  |  0  0
3423     Proc1   0 18  0  | 19 20 21  |  0  0
3424             0  0  0  | 22 23  0  | 24  0
3425     -------------------------------------
3426     Proc2  25 26 27  |  0  0 28  | 29  0
3427            30  0  0  | 31 32 33  |  0 34
3428 .ve
3429 
3430    This can be represented as a collection of submatrices as:
3431 
3432 .vb
3433       A B C
3434       D E F
3435       G H I
3436 .ve
3437 
3438    Where the submatrices A,B,C are owned by proc0, D,E,F are
3439    owned by proc1, G,H,I are owned by proc2.
3440 
3441    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3442    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3443    The 'M','N' parameters are 8,8, and have the same values on all procs.
3444 
3445    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3446    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3447    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3448    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3449    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3450    matrix, ans [DF] as another SeqAIJ matrix.
3451 
3452    When d_nz, o_nz parameters are specified, d_nz storage elements are
3453    allocated for every row of the local diagonal submatrix, and o_nz
3454    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3455    One way to choose d_nz and o_nz is to use the max nonzerors per local
3456    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3457    In this case, the values of d_nz,o_nz are:
3458 .vb
3459      proc0 : dnz = 2, o_nz = 2
3460      proc1 : dnz = 3, o_nz = 2
3461      proc2 : dnz = 1, o_nz = 4
3462 .ve
3463    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3464    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3465    for proc3. i.e we are using 12+15+10=37 storage locations to store
3466    34 values.
3467 
3468    When d_nnz, o_nnz parameters are specified, the storage is specified
3469    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3470    In the above case the values for d_nnz,o_nnz are:
3471 .vb
3472      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3473      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3474      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3475 .ve
3476    Here the space allocated is sum of all the above values i.e 34, and
3477    hence pre-allocation is perfect.
3478 
3479    Level: intermediate
3480 
3481 .keywords: matrix, aij, compressed row, sparse, parallel
3482 
3483 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3484           MPIAIJ, MatGetInfo()
3485 @*/
3486 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3487 {
3488   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3489 
3490   PetscFunctionBegin;
3491   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3492   if (f) {
3493     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3494   }
3495   PetscFunctionReturn(0);
3496 }
3497 
3498 #undef __FUNCT__
3499 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3500 /*@
3501      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3502          CSR format the local rows.
3503 
3504    Collective on MPI_Comm
3505 
3506    Input Parameters:
3507 +  comm - MPI communicator
3508 .  m - number of local rows (Cannot be PETSC_DECIDE)
3509 .  n - This value should be the same as the local size used in creating the
3510        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3511        calculated if N is given) For square matrices n is almost always m.
3512 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3513 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3514 .   i - row indices
3515 .   j - column indices
3516 -   a - matrix values
3517 
3518    Output Parameter:
3519 .   mat - the matrix
3520 
3521    Level: intermediate
3522 
3523    Notes:
3524        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3525      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3526      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3527 
3528        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3529 
3530        The format which is used for the sparse matrix input, is equivalent to a
3531     row-major ordering.. i.e for the following matrix, the input data expected is
3532     as shown:
3533 
3534         1 0 0
3535         2 0 3     P0
3536        -------
3537         4 5 6     P1
3538 
3539      Process0 [P0]: rows_owned=[0,1]
3540         i =  {0,1,3}  [size = nrow+1  = 2+1]
3541         j =  {0,0,2}  [size = nz = 6]
3542         v =  {1,2,3}  [size = nz = 6]
3543 
3544      Process1 [P1]: rows_owned=[2]
3545         i =  {0,3}    [size = nrow+1  = 1+1]
3546         j =  {0,1,2}  [size = nz = 6]
3547         v =  {4,5,6}  [size = nz = 6]
3548 
3549 .keywords: matrix, aij, compressed row, sparse, parallel
3550 
3551 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3552           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3553 @*/
3554 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3555 {
3556   PetscErrorCode ierr;
3557 
3558  PetscFunctionBegin;
3559   if (i[0]) {
3560     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3561   }
3562   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3563   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3564   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3565   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3566   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3567   PetscFunctionReturn(0);
3568 }
3569 
3570 #undef __FUNCT__
3571 #define __FUNCT__ "MatCreateMPIAIJ"
3572 /*@C
3573    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3574    (the default parallel PETSc format).  For good matrix assembly performance
3575    the user should preallocate the matrix storage by setting the parameters
3576    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3577    performance can be increased by more than a factor of 50.
3578 
3579    Collective on MPI_Comm
3580 
3581    Input Parameters:
3582 +  comm - MPI communicator
3583 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3584            This value should be the same as the local size used in creating the
3585            y vector for the matrix-vector product y = Ax.
3586 .  n - This value should be the same as the local size used in creating the
3587        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3588        calculated if N is given) For square matrices n is almost always m.
3589 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3590 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3591 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3592            (same value is used for all local rows)
3593 .  d_nnz - array containing the number of nonzeros in the various rows of the
3594            DIAGONAL portion of the local submatrix (possibly different for each row)
3595            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3596            The size of this array is equal to the number of local rows, i.e 'm'.
3597            You must leave room for the diagonal entry even if it is zero.
3598 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3599            submatrix (same value is used for all local rows).
3600 -  o_nnz - array containing the number of nonzeros in the various rows of the
3601            OFF-DIAGONAL portion of the local submatrix (possibly different for
3602            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3603            structure. The size of this array is equal to the number
3604            of local rows, i.e 'm'.
3605 
3606    Output Parameter:
3607 .  A - the matrix
3608 
3609    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3610    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3611    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3612 
3613    Notes:
3614    If the *_nnz parameter is given then the *_nz parameter is ignored
3615 
3616    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3617    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3618    storage requirements for this matrix.
3619 
3620    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3621    processor than it must be used on all processors that share the object for
3622    that argument.
3623 
3624    The user MUST specify either the local or global matrix dimensions
3625    (possibly both).
3626 
3627    The parallel matrix is partitioned across processors such that the
3628    first m0 rows belong to process 0, the next m1 rows belong to
3629    process 1, the next m2 rows belong to process 2 etc.. where
3630    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3631    values corresponding to [m x N] submatrix.
3632 
3633    The columns are logically partitioned with the n0 columns belonging
3634    to 0th partition, the next n1 columns belonging to the next
3635    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3636 
3637    The DIAGONAL portion of the local submatrix on any given processor
3638    is the submatrix corresponding to the rows and columns m,n
3639    corresponding to the given processor. i.e diagonal matrix on
3640    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3641    etc. The remaining portion of the local submatrix [m x (N-n)]
3642    constitute the OFF-DIAGONAL portion. The example below better
3643    illustrates this concept.
3644 
3645    For a square global matrix we define each processor's diagonal portion
3646    to be its local rows and the corresponding columns (a square submatrix);
3647    each processor's off-diagonal portion encompasses the remainder of the
3648    local matrix (a rectangular submatrix).
3649 
3650    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3651 
3652    When calling this routine with a single process communicator, a matrix of
3653    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3654    type of communicator, use the construction mechanism:
3655      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3656 
3657    By default, this format uses inodes (identical nodes) when possible.
3658    We search for consecutive rows with the same nonzero structure, thereby
3659    reusing matrix information to achieve increased efficiency.
3660 
3661    Options Database Keys:
3662 +  -mat_no_inode  - Do not use inodes
3663 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3664 -  -mat_aij_oneindex - Internally use indexing starting at 1
3665         rather than 0.  Note that when calling MatSetValues(),
3666         the user still MUST index entries starting at 0!
3667 
3668 
3669    Example usage:
3670 
3671    Consider the following 8x8 matrix with 34 non-zero values, that is
3672    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3673    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3674    as follows:
3675 
3676 .vb
3677             1  2  0  |  0  3  0  |  0  4
3678     Proc0   0  5  6  |  7  0  0  |  8  0
3679             9  0 10  | 11  0  0  | 12  0
3680     -------------------------------------
3681            13  0 14  | 15 16 17  |  0  0
3682     Proc1   0 18  0  | 19 20 21  |  0  0
3683             0  0  0  | 22 23  0  | 24  0
3684     -------------------------------------
3685     Proc2  25 26 27  |  0  0 28  | 29  0
3686            30  0  0  | 31 32 33  |  0 34
3687 .ve
3688 
3689    This can be represented as a collection of submatrices as:
3690 
3691 .vb
3692       A B C
3693       D E F
3694       G H I
3695 .ve
3696 
3697    Where the submatrices A,B,C are owned by proc0, D,E,F are
3698    owned by proc1, G,H,I are owned by proc2.
3699 
3700    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3701    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3702    The 'M','N' parameters are 8,8, and have the same values on all procs.
3703 
3704    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3705    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3706    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3707    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3708    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3709    matrix, ans [DF] as another SeqAIJ matrix.
3710 
3711    When d_nz, o_nz parameters are specified, d_nz storage elements are
3712    allocated for every row of the local diagonal submatrix, and o_nz
3713    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3714    One way to choose d_nz and o_nz is to use the max nonzerors per local
3715    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3716    In this case, the values of d_nz,o_nz are:
3717 .vb
3718      proc0 : dnz = 2, o_nz = 2
3719      proc1 : dnz = 3, o_nz = 2
3720      proc2 : dnz = 1, o_nz = 4
3721 .ve
3722    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3723    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3724    for proc3. i.e we are using 12+15+10=37 storage locations to store
3725    34 values.
3726 
3727    When d_nnz, o_nnz parameters are specified, the storage is specified
3728    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3729    In the above case the values for d_nnz,o_nnz are:
3730 .vb
3731      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3732      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3733      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3734 .ve
3735    Here the space allocated is sum of all the above values i.e 34, and
3736    hence pre-allocation is perfect.
3737 
3738    Level: intermediate
3739 
3740 .keywords: matrix, aij, compressed row, sparse, parallel
3741 
3742 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3743           MPIAIJ, MatCreateMPIAIJWithArrays()
3744 @*/
3745 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3746 {
3747   PetscErrorCode ierr;
3748   PetscMPIInt    size;
3749 
3750   PetscFunctionBegin;
3751   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3752   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3753   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3754   if (size > 1) {
3755     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3756     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3757   } else {
3758     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3759     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3760   }
3761   PetscFunctionReturn(0);
3762 }
3763 
3764 #undef __FUNCT__
3765 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3766 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3767 {
3768   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3769 
3770   PetscFunctionBegin;
3771   *Ad     = a->A;
3772   *Ao     = a->B;
3773   *colmap = a->garray;
3774   PetscFunctionReturn(0);
3775 }
3776 
3777 #undef __FUNCT__
3778 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3779 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3780 {
3781   PetscErrorCode ierr;
3782   PetscInt       i;
3783   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3784 
3785   PetscFunctionBegin;
3786   if (coloring->ctype == IS_COLORING_GLOBAL) {
3787     ISColoringValue *allcolors,*colors;
3788     ISColoring      ocoloring;
3789 
3790     /* set coloring for diagonal portion */
3791     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3792 
3793     /* set coloring for off-diagonal portion */
3794     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3795     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3796     for (i=0; i<a->B->cmap->n; i++) {
3797       colors[i] = allcolors[a->garray[i]];
3798     }
3799     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3800     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3801     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3802     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3803   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3804     ISColoringValue *colors;
3805     PetscInt        *larray;
3806     ISColoring      ocoloring;
3807 
3808     /* set coloring for diagonal portion */
3809     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3810     for (i=0; i<a->A->cmap->n; i++) {
3811       larray[i] = i + A->cmap->rstart;
3812     }
3813     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3814     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3815     for (i=0; i<a->A->cmap->n; i++) {
3816       colors[i] = coloring->colors[larray[i]];
3817     }
3818     ierr = PetscFree(larray);CHKERRQ(ierr);
3819     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3820     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3821     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3822 
3823     /* set coloring for off-diagonal portion */
3824     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3825     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3826     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3827     for (i=0; i<a->B->cmap->n; i++) {
3828       colors[i] = coloring->colors[larray[i]];
3829     }
3830     ierr = PetscFree(larray);CHKERRQ(ierr);
3831     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3832     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3833     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3834   } else {
3835     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3836   }
3837 
3838   PetscFunctionReturn(0);
3839 }
3840 
3841 #if defined(PETSC_HAVE_ADIC)
3842 #undef __FUNCT__
3843 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3844 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3845 {
3846   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3847   PetscErrorCode ierr;
3848 
3849   PetscFunctionBegin;
3850   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3851   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3852   PetscFunctionReturn(0);
3853 }
3854 #endif
3855 
3856 #undef __FUNCT__
3857 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3858 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3859 {
3860   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3861   PetscErrorCode ierr;
3862 
3863   PetscFunctionBegin;
3864   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3865   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3866   PetscFunctionReturn(0);
3867 }
3868 
3869 #undef __FUNCT__
3870 #define __FUNCT__ "MatMerge"
3871 /*@
3872       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3873                  matrices from each processor
3874 
3875     Collective on MPI_Comm
3876 
3877    Input Parameters:
3878 +    comm - the communicators the parallel matrix will live on
3879 .    inmat - the input sequential matrices
3880 .    n - number of local columns (or PETSC_DECIDE)
3881 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3882 
3883    Output Parameter:
3884 .    outmat - the parallel matrix generated
3885 
3886     Level: advanced
3887 
3888    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3889 
3890 @*/
3891 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3892 {
3893   PetscErrorCode ierr;
3894   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3895   PetscInt       *indx;
3896   PetscScalar    *values;
3897 
3898   PetscFunctionBegin;
3899   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3900   if (scall == MAT_INITIAL_MATRIX){
3901     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3902     if (n == PETSC_DECIDE){
3903       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3904     }
3905     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3906     rstart -= m;
3907 
3908     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3909     for (i=0;i<m;i++) {
3910       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3911       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3912       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3913     }
3914     /* This routine will ONLY return MPIAIJ type matrix */
3915     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3916     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3917     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3918     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3919     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3920 
3921   } else if (scall == MAT_REUSE_MATRIX){
3922     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3923   } else {
3924     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3925   }
3926 
3927   for (i=0;i<m;i++) {
3928     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3929     Ii    = i + rstart;
3930     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3931     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3932   }
3933   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3934   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3935   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3936 
3937   PetscFunctionReturn(0);
3938 }
3939 
3940 #undef __FUNCT__
3941 #define __FUNCT__ "MatFileSplit"
3942 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3943 {
3944   PetscErrorCode    ierr;
3945   PetscMPIInt       rank;
3946   PetscInt          m,N,i,rstart,nnz;
3947   size_t            len;
3948   const PetscInt    *indx;
3949   PetscViewer       out;
3950   char              *name;
3951   Mat               B;
3952   const PetscScalar *values;
3953 
3954   PetscFunctionBegin;
3955   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3956   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3957   /* Should this be the type of the diagonal block of A? */
3958   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3959   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3960   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3961   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3962   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3963   for (i=0;i<m;i++) {
3964     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3965     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3966     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3967   }
3968   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3969   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3970 
3971   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3972   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3973   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3974   sprintf(name,"%s.%d",outfile,rank);
3975   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3976   ierr = PetscFree(name);
3977   ierr = MatView(B,out);CHKERRQ(ierr);
3978   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3979   ierr = MatDestroy(B);CHKERRQ(ierr);
3980   PetscFunctionReturn(0);
3981 }
3982 
3983 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3984 #undef __FUNCT__
3985 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3986 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3987 {
3988   PetscErrorCode       ierr;
3989   Mat_Merge_SeqsToMPI  *merge;
3990   PetscContainer       container;
3991 
3992   PetscFunctionBegin;
3993   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3994   if (container) {
3995     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3996     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3997     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3998     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3999     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4000     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4001     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4002     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4003     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4004     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4005     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4006     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
4007 
4008     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4009     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4010   }
4011   ierr = PetscFree(merge);CHKERRQ(ierr);
4012 
4013   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4014   PetscFunctionReturn(0);
4015 }
4016 
4017 #include "../src/mat/utils/freespace.h"
4018 #include "petscbt.h"
4019 
4020 #undef __FUNCT__
4021 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4022 /*@C
4023       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4024                  matrices from each processor
4025 
4026     Collective on MPI_Comm
4027 
4028    Input Parameters:
4029 +    comm - the communicators the parallel matrix will live on
4030 .    seqmat - the input sequential matrices
4031 .    m - number of local rows (or PETSC_DECIDE)
4032 .    n - number of local columns (or PETSC_DECIDE)
4033 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4034 
4035    Output Parameter:
4036 .    mpimat - the parallel matrix generated
4037 
4038     Level: advanced
4039 
4040    Notes:
4041      The dimensions of the sequential matrix in each processor MUST be the same.
4042      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4043      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4044 @*/
4045 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4046 {
4047   PetscErrorCode       ierr;
4048   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4049   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4050   PetscMPIInt          size,rank,taga,*len_s;
4051   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4052   PetscInt             proc,m;
4053   PetscInt             **buf_ri,**buf_rj;
4054   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4055   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4056   MPI_Request          *s_waits,*r_waits;
4057   MPI_Status           *status;
4058   MatScalar            *aa=a->a;
4059   MatScalar            **abuf_r,*ba_i;
4060   Mat_Merge_SeqsToMPI  *merge;
4061   PetscContainer       container;
4062 
4063   PetscFunctionBegin;
4064   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4065 
4066   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4067   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4068 
4069   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4070   if (container) {
4071     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4072   }
4073   bi     = merge->bi;
4074   bj     = merge->bj;
4075   buf_ri = merge->buf_ri;
4076   buf_rj = merge->buf_rj;
4077 
4078   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4079   owners = merge->rowmap.range;
4080   len_s  = merge->len_s;
4081 
4082   /* send and recv matrix values */
4083   /*-----------------------------*/
4084   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4085   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4086 
4087   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4088   for (proc=0,k=0; proc<size; proc++){
4089     if (!len_s[proc]) continue;
4090     i = owners[proc];
4091     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4092     k++;
4093   }
4094 
4095   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4096   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4097   ierr = PetscFree(status);CHKERRQ(ierr);
4098 
4099   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4100   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4101 
4102   /* insert mat values of mpimat */
4103   /*----------------------------*/
4104   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4105   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4106   nextrow = buf_ri_k + merge->nrecv;
4107   nextai  = nextrow + merge->nrecv;
4108 
4109   for (k=0; k<merge->nrecv; k++){
4110     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4111     nrows = *(buf_ri_k[k]);
4112     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4113     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4114   }
4115 
4116   /* set values of ba */
4117   m = merge->rowmap.n;
4118   for (i=0; i<m; i++) {
4119     arow = owners[rank] + i;
4120     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4121     bnzi = bi[i+1] - bi[i];
4122     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4123 
4124     /* add local non-zero vals of this proc's seqmat into ba */
4125     anzi = ai[arow+1] - ai[arow];
4126     aj   = a->j + ai[arow];
4127     aa   = a->a + ai[arow];
4128     nextaj = 0;
4129     for (j=0; nextaj<anzi; j++){
4130       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4131         ba_i[j] += aa[nextaj++];
4132       }
4133     }
4134 
4135     /* add received vals into ba */
4136     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4137       /* i-th row */
4138       if (i == *nextrow[k]) {
4139         anzi = *(nextai[k]+1) - *nextai[k];
4140         aj   = buf_rj[k] + *(nextai[k]);
4141         aa   = abuf_r[k] + *(nextai[k]);
4142         nextaj = 0;
4143         for (j=0; nextaj<anzi; j++){
4144           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4145             ba_i[j] += aa[nextaj++];
4146           }
4147         }
4148         nextrow[k]++; nextai[k]++;
4149       }
4150     }
4151     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4152   }
4153   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4154   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4155 
4156   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4157   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4158   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4159   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4160   PetscFunctionReturn(0);
4161 }
4162 
4163 #undef __FUNCT__
4164 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4165 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4166 {
4167   PetscErrorCode       ierr;
4168   Mat                  B_mpi;
4169   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4170   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4171   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4172   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4173   PetscInt             len,proc,*dnz,*onz;
4174   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4175   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4176   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4177   MPI_Status           *status;
4178   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4179   PetscBT              lnkbt;
4180   Mat_Merge_SeqsToMPI  *merge;
4181   PetscContainer       container;
4182 
4183   PetscFunctionBegin;
4184   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4185 
4186   /* make sure it is a PETSc comm */
4187   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4188   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4189   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4190 
4191   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4192   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4193 
4194   /* determine row ownership */
4195   /*---------------------------------------------------------*/
4196   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
4197   merge->rowmap.n = m;
4198   merge->rowmap.N = M;
4199   merge->rowmap.bs = 1;
4200   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
4201   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4202   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4203 
4204   m      = merge->rowmap.n;
4205   M      = merge->rowmap.N;
4206   owners = merge->rowmap.range;
4207 
4208   /* determine the number of messages to send, their lengths */
4209   /*---------------------------------------------------------*/
4210   len_s  = merge->len_s;
4211 
4212   len = 0;  /* length of buf_si[] */
4213   merge->nsend = 0;
4214   for (proc=0; proc<size; proc++){
4215     len_si[proc] = 0;
4216     if (proc == rank){
4217       len_s[proc] = 0;
4218     } else {
4219       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4220       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4221     }
4222     if (len_s[proc]) {
4223       merge->nsend++;
4224       nrows = 0;
4225       for (i=owners[proc]; i<owners[proc+1]; i++){
4226         if (ai[i+1] > ai[i]) nrows++;
4227       }
4228       len_si[proc] = 2*(nrows+1);
4229       len += len_si[proc];
4230     }
4231   }
4232 
4233   /* determine the number and length of messages to receive for ij-structure */
4234   /*-------------------------------------------------------------------------*/
4235   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4236   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4237 
4238   /* post the Irecv of j-structure */
4239   /*-------------------------------*/
4240   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4241   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4242 
4243   /* post the Isend of j-structure */
4244   /*--------------------------------*/
4245   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
4246   sj_waits = si_waits + merge->nsend;
4247 
4248   for (proc=0, k=0; proc<size; proc++){
4249     if (!len_s[proc]) continue;
4250     i = owners[proc];
4251     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4252     k++;
4253   }
4254 
4255   /* receives and sends of j-structure are complete */
4256   /*------------------------------------------------*/
4257   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4258   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4259 
4260   /* send and recv i-structure */
4261   /*---------------------------*/
4262   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4263   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4264 
4265   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4266   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4267   for (proc=0,k=0; proc<size; proc++){
4268     if (!len_s[proc]) continue;
4269     /* form outgoing message for i-structure:
4270          buf_si[0]:                 nrows to be sent
4271                [1:nrows]:           row index (global)
4272                [nrows+1:2*nrows+1]: i-structure index
4273     */
4274     /*-------------------------------------------*/
4275     nrows = len_si[proc]/2 - 1;
4276     buf_si_i    = buf_si + nrows+1;
4277     buf_si[0]   = nrows;
4278     buf_si_i[0] = 0;
4279     nrows = 0;
4280     for (i=owners[proc]; i<owners[proc+1]; i++){
4281       anzi = ai[i+1] - ai[i];
4282       if (anzi) {
4283         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4284         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4285         nrows++;
4286       }
4287     }
4288     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4289     k++;
4290     buf_si += len_si[proc];
4291   }
4292 
4293   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4294   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4295 
4296   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4297   for (i=0; i<merge->nrecv; i++){
4298     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4299   }
4300 
4301   ierr = PetscFree(len_si);CHKERRQ(ierr);
4302   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4303   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4304   ierr = PetscFree(si_waits);CHKERRQ(ierr);
4305   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4306   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4307   ierr = PetscFree(status);CHKERRQ(ierr);
4308 
4309   /* compute a local seq matrix in each processor */
4310   /*----------------------------------------------*/
4311   /* allocate bi array and free space for accumulating nonzero column info */
4312   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4313   bi[0] = 0;
4314 
4315   /* create and initialize a linked list */
4316   nlnk = N+1;
4317   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4318 
4319   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4320   len = 0;
4321   len  = ai[owners[rank+1]] - ai[owners[rank]];
4322   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4323   current_space = free_space;
4324 
4325   /* determine symbolic info for each local row */
4326   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4327   nextrow = buf_ri_k + merge->nrecv;
4328   nextai  = nextrow + merge->nrecv;
4329   for (k=0; k<merge->nrecv; k++){
4330     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4331     nrows = *buf_ri_k[k];
4332     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4333     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4334   }
4335 
4336   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4337   len = 0;
4338   for (i=0;i<m;i++) {
4339     bnzi   = 0;
4340     /* add local non-zero cols of this proc's seqmat into lnk */
4341     arow   = owners[rank] + i;
4342     anzi   = ai[arow+1] - ai[arow];
4343     aj     = a->j + ai[arow];
4344     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4345     bnzi += nlnk;
4346     /* add received col data into lnk */
4347     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4348       if (i == *nextrow[k]) { /* i-th row */
4349         anzi = *(nextai[k]+1) - *nextai[k];
4350         aj   = buf_rj[k] + *nextai[k];
4351         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4352         bnzi += nlnk;
4353         nextrow[k]++; nextai[k]++;
4354       }
4355     }
4356     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4357 
4358     /* if free space is not available, make more free space */
4359     if (current_space->local_remaining<bnzi) {
4360       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4361       nspacedouble++;
4362     }
4363     /* copy data into free space, then initialize lnk */
4364     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4365     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4366 
4367     current_space->array           += bnzi;
4368     current_space->local_used      += bnzi;
4369     current_space->local_remaining -= bnzi;
4370 
4371     bi[i+1] = bi[i] + bnzi;
4372   }
4373 
4374   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4375 
4376   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4377   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4378   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4379 
4380   /* create symbolic parallel matrix B_mpi */
4381   /*---------------------------------------*/
4382   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4383   if (n==PETSC_DECIDE) {
4384     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4385   } else {
4386     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4387   }
4388   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4389   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4390   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4391 
4392   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4393   B_mpi->assembled     = PETSC_FALSE;
4394   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4395   merge->bi            = bi;
4396   merge->bj            = bj;
4397   merge->buf_ri        = buf_ri;
4398   merge->buf_rj        = buf_rj;
4399   merge->coi           = PETSC_NULL;
4400   merge->coj           = PETSC_NULL;
4401   merge->owners_co     = PETSC_NULL;
4402 
4403   /* attach the supporting struct to B_mpi for reuse */
4404   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4405   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4406   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4407   *mpimat = B_mpi;
4408 
4409   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4410   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4411   PetscFunctionReturn(0);
4412 }
4413 
4414 #undef __FUNCT__
4415 #define __FUNCT__ "MatMerge_SeqsToMPI"
4416 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4417 {
4418   PetscErrorCode   ierr;
4419 
4420   PetscFunctionBegin;
4421   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4422   if (scall == MAT_INITIAL_MATRIX){
4423     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4424   }
4425   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4426   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4427   PetscFunctionReturn(0);
4428 }
4429 
4430 #undef __FUNCT__
4431 #define __FUNCT__ "MatGetLocalMat"
4432 /*@
4433      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4434 
4435     Not Collective
4436 
4437    Input Parameters:
4438 +    A - the matrix
4439 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4440 
4441    Output Parameter:
4442 .    A_loc - the local sequential matrix generated
4443 
4444     Level: developer
4445 
4446 @*/
4447 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4448 {
4449   PetscErrorCode  ierr;
4450   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4451   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4452   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4453   MatScalar       *aa=a->a,*ba=b->a,*cam;
4454   PetscScalar     *ca;
4455   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4456   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4457 
4458   PetscFunctionBegin;
4459   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4460   if (scall == MAT_INITIAL_MATRIX){
4461     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4462     ci[0] = 0;
4463     for (i=0; i<am; i++){
4464       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4465     }
4466     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4467     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4468     k = 0;
4469     for (i=0; i<am; i++) {
4470       ncols_o = bi[i+1] - bi[i];
4471       ncols_d = ai[i+1] - ai[i];
4472       /* off-diagonal portion of A */
4473       for (jo=0; jo<ncols_o; jo++) {
4474         col = cmap[*bj];
4475         if (col >= cstart) break;
4476         cj[k]   = col; bj++;
4477         ca[k++] = *ba++;
4478       }
4479       /* diagonal portion of A */
4480       for (j=0; j<ncols_d; j++) {
4481         cj[k]   = cstart + *aj++;
4482         ca[k++] = *aa++;
4483       }
4484       /* off-diagonal portion of A */
4485       for (j=jo; j<ncols_o; j++) {
4486         cj[k]   = cmap[*bj++];
4487         ca[k++] = *ba++;
4488       }
4489     }
4490     /* put together the new matrix */
4491     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4492     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4493     /* Since these are PETSc arrays, change flags to free them as necessary. */
4494     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4495     mat->free_a  = PETSC_TRUE;
4496     mat->free_ij = PETSC_TRUE;
4497     mat->nonew   = 0;
4498   } else if (scall == MAT_REUSE_MATRIX){
4499     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4500     ci = mat->i; cj = mat->j; cam = mat->a;
4501     for (i=0; i<am; i++) {
4502       /* off-diagonal portion of A */
4503       ncols_o = bi[i+1] - bi[i];
4504       for (jo=0; jo<ncols_o; jo++) {
4505         col = cmap[*bj];
4506         if (col >= cstart) break;
4507         *cam++ = *ba++; bj++;
4508       }
4509       /* diagonal portion of A */
4510       ncols_d = ai[i+1] - ai[i];
4511       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4512       /* off-diagonal portion of A */
4513       for (j=jo; j<ncols_o; j++) {
4514         *cam++ = *ba++; bj++;
4515       }
4516     }
4517   } else {
4518     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4519   }
4520 
4521   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4522   PetscFunctionReturn(0);
4523 }
4524 
4525 #undef __FUNCT__
4526 #define __FUNCT__ "MatGetLocalMatCondensed"
4527 /*@C
4528      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4529 
4530     Not Collective
4531 
4532    Input Parameters:
4533 +    A - the matrix
4534 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4535 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4536 
4537    Output Parameter:
4538 .    A_loc - the local sequential matrix generated
4539 
4540     Level: developer
4541 
4542 @*/
4543 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4544 {
4545   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4546   PetscErrorCode    ierr;
4547   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4548   IS                isrowa,iscola;
4549   Mat               *aloc;
4550 
4551   PetscFunctionBegin;
4552   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4553   if (!row){
4554     start = A->rmap->rstart; end = A->rmap->rend;
4555     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4556   } else {
4557     isrowa = *row;
4558   }
4559   if (!col){
4560     start = A->cmap->rstart;
4561     cmap  = a->garray;
4562     nzA   = a->A->cmap->n;
4563     nzB   = a->B->cmap->n;
4564     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4565     ncols = 0;
4566     for (i=0; i<nzB; i++) {
4567       if (cmap[i] < start) idx[ncols++] = cmap[i];
4568       else break;
4569     }
4570     imark = i;
4571     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4572     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4573     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4574     ierr = PetscFree(idx);CHKERRQ(ierr);
4575   } else {
4576     iscola = *col;
4577   }
4578   if (scall != MAT_INITIAL_MATRIX){
4579     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4580     aloc[0] = *A_loc;
4581   }
4582   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4583   *A_loc = aloc[0];
4584   ierr = PetscFree(aloc);CHKERRQ(ierr);
4585   if (!row){
4586     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4587   }
4588   if (!col){
4589     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4590   }
4591   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4592   PetscFunctionReturn(0);
4593 }
4594 
4595 #undef __FUNCT__
4596 #define __FUNCT__ "MatGetBrowsOfAcols"
4597 /*@C
4598     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4599 
4600     Collective on Mat
4601 
4602    Input Parameters:
4603 +    A,B - the matrices in mpiaij format
4604 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4605 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4606 
4607    Output Parameter:
4608 +    rowb, colb - index sets of rows and columns of B to extract
4609 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4610 -    B_seq - the sequential matrix generated
4611 
4612     Level: developer
4613 
4614 @*/
4615 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4616 {
4617   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4618   PetscErrorCode    ierr;
4619   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4620   IS                isrowb,iscolb;
4621   Mat               *bseq;
4622 
4623   PetscFunctionBegin;
4624   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4625     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4626   }
4627   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4628 
4629   if (scall == MAT_INITIAL_MATRIX){
4630     start = A->cmap->rstart;
4631     cmap  = a->garray;
4632     nzA   = a->A->cmap->n;
4633     nzB   = a->B->cmap->n;
4634     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4635     ncols = 0;
4636     for (i=0; i<nzB; i++) {  /* row < local row index */
4637       if (cmap[i] < start) idx[ncols++] = cmap[i];
4638       else break;
4639     }
4640     imark = i;
4641     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4642     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4643     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4644     ierr = PetscFree(idx);CHKERRQ(ierr);
4645     *brstart = imark;
4646     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4647   } else {
4648     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4649     isrowb = *rowb; iscolb = *colb;
4650     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4651     bseq[0] = *B_seq;
4652   }
4653   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4654   *B_seq = bseq[0];
4655   ierr = PetscFree(bseq);CHKERRQ(ierr);
4656   if (!rowb){
4657     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4658   } else {
4659     *rowb = isrowb;
4660   }
4661   if (!colb){
4662     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4663   } else {
4664     *colb = iscolb;
4665   }
4666   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4667   PetscFunctionReturn(0);
4668 }
4669 
4670 #undef __FUNCT__
4671 #define __FUNCT__ "MatGetBrowsOfAoCols"
4672 /*@C
4673     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4674     of the OFF-DIAGONAL portion of local A
4675 
4676     Collective on Mat
4677 
4678    Input Parameters:
4679 +    A,B - the matrices in mpiaij format
4680 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4681 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4682 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4683 
4684    Output Parameter:
4685 +    B_oth - the sequential matrix generated
4686 
4687     Level: developer
4688 
4689 @*/
4690 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth)
4691 {
4692   VecScatter_MPI_General *gen_to,*gen_from;
4693   PetscErrorCode         ierr;
4694   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4695   Mat_SeqAIJ             *b_oth;
4696   VecScatter             ctx=a->Mvctx;
4697   MPI_Comm               comm=((PetscObject)ctx)->comm;
4698   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4699   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4700   PetscScalar            *rvalues,*svalues;
4701   MatScalar              *b_otha,*bufa,*bufA;
4702   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4703   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4704   MPI_Status             *sstatus,rstatus;
4705   PetscMPIInt            jj;
4706   PetscInt               *cols,sbs,rbs;
4707   PetscScalar            *vals;
4708 
4709   PetscFunctionBegin;
4710   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4711     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4712   }
4713   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4714   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4715 
4716   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4717   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4718   rvalues  = gen_from->values; /* holds the length of receiving row */
4719   svalues  = gen_to->values;   /* holds the length of sending row */
4720   nrecvs   = gen_from->n;
4721   nsends   = gen_to->n;
4722 
4723   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4724   srow     = gen_to->indices;   /* local row index to be sent */
4725   sstarts  = gen_to->starts;
4726   sprocs   = gen_to->procs;
4727   sstatus  = gen_to->sstatus;
4728   sbs      = gen_to->bs;
4729   rstarts  = gen_from->starts;
4730   rprocs   = gen_from->procs;
4731   rbs      = gen_from->bs;
4732 
4733   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4734   if (scall == MAT_INITIAL_MATRIX){
4735     /* i-array */
4736     /*---------*/
4737     /*  post receives */
4738     for (i=0; i<nrecvs; i++){
4739       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4740       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4741       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4742     }
4743 
4744     /* pack the outgoing message */
4745     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4746     rstartsj = sstartsj + nsends +1;
4747     sstartsj[0] = 0;  rstartsj[0] = 0;
4748     len = 0; /* total length of j or a array to be sent */
4749     k = 0;
4750     for (i=0; i<nsends; i++){
4751       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4752       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4753       for (j=0; j<nrows; j++) {
4754         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4755         for (l=0; l<sbs; l++){
4756           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4757           rowlen[j*sbs+l] = ncols;
4758           len += ncols;
4759           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4760         }
4761         k++;
4762       }
4763       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4764       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4765     }
4766     /* recvs and sends of i-array are completed */
4767     i = nrecvs;
4768     while (i--) {
4769       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4770     }
4771     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4772 
4773     /* allocate buffers for sending j and a arrays */
4774     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4775     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4776 
4777     /* create i-array of B_oth */
4778     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4779     b_othi[0] = 0;
4780     len = 0; /* total length of j or a array to be received */
4781     k = 0;
4782     for (i=0; i<nrecvs; i++){
4783       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4784       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4785       for (j=0; j<nrows; j++) {
4786         b_othi[k+1] = b_othi[k] + rowlen[j];
4787         len += rowlen[j]; k++;
4788       }
4789       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4790     }
4791 
4792     /* allocate space for j and a arrrays of B_oth */
4793     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4794     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4795 
4796     /* j-array */
4797     /*---------*/
4798     /*  post receives of j-array */
4799     for (i=0; i<nrecvs; i++){
4800       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4801       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4802     }
4803 
4804     /* pack the outgoing message j-array */
4805     k = 0;
4806     for (i=0; i<nsends; i++){
4807       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4808       bufJ = bufj+sstartsj[i];
4809       for (j=0; j<nrows; j++) {
4810         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4811         for (ll=0; ll<sbs; ll++){
4812           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4813           for (l=0; l<ncols; l++){
4814             *bufJ++ = cols[l];
4815           }
4816           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4817         }
4818       }
4819       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4820     }
4821 
4822     /* recvs and sends of j-array are completed */
4823     i = nrecvs;
4824     while (i--) {
4825       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4826     }
4827     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4828   } else if (scall == MAT_REUSE_MATRIX){
4829     sstartsj = *startsj;
4830     rstartsj = sstartsj + nsends +1;
4831     bufa     = *bufa_ptr;
4832     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4833     b_otha   = b_oth->a;
4834   } else {
4835     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4836   }
4837 
4838   /* a-array */
4839   /*---------*/
4840   /*  post receives of a-array */
4841   for (i=0; i<nrecvs; i++){
4842     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4843     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4844   }
4845 
4846   /* pack the outgoing message a-array */
4847   k = 0;
4848   for (i=0; i<nsends; i++){
4849     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4850     bufA = bufa+sstartsj[i];
4851     for (j=0; j<nrows; j++) {
4852       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4853       for (ll=0; ll<sbs; ll++){
4854         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4855         for (l=0; l<ncols; l++){
4856           *bufA++ = vals[l];
4857         }
4858         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4859       }
4860     }
4861     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4862   }
4863   /* recvs and sends of a-array are completed */
4864   i = nrecvs;
4865   while (i--) {
4866     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4867   }
4868   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4869   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4870 
4871   if (scall == MAT_INITIAL_MATRIX){
4872     /* put together the new matrix */
4873     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4874 
4875     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4876     /* Since these are PETSc arrays, change flags to free them as necessary. */
4877     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4878     b_oth->free_a  = PETSC_TRUE;
4879     b_oth->free_ij = PETSC_TRUE;
4880     b_oth->nonew   = 0;
4881 
4882     ierr = PetscFree(bufj);CHKERRQ(ierr);
4883     if (!startsj || !bufa_ptr){
4884       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4885       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4886     } else {
4887       *startsj  = sstartsj;
4888       *bufa_ptr = bufa;
4889     }
4890   }
4891   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4892   PetscFunctionReturn(0);
4893 }
4894 
4895 #undef __FUNCT__
4896 #define __FUNCT__ "MatGetCommunicationStructs"
4897 /*@C
4898   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4899 
4900   Not Collective
4901 
4902   Input Parameters:
4903 . A - The matrix in mpiaij format
4904 
4905   Output Parameter:
4906 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4907 . colmap - A map from global column index to local index into lvec
4908 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4909 
4910   Level: developer
4911 
4912 @*/
4913 #if defined (PETSC_USE_CTABLE)
4914 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4915 #else
4916 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4917 #endif
4918 {
4919   Mat_MPIAIJ *a;
4920 
4921   PetscFunctionBegin;
4922   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4923   PetscValidPointer(lvec, 2)
4924   PetscValidPointer(colmap, 3)
4925   PetscValidPointer(multScatter, 4)
4926   a = (Mat_MPIAIJ *) A->data;
4927   if (lvec) *lvec = a->lvec;
4928   if (colmap) *colmap = a->colmap;
4929   if (multScatter) *multScatter = a->Mvctx;
4930   PetscFunctionReturn(0);
4931 }
4932 
4933 EXTERN_C_BEGIN
4934 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4935 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4936 EXTERN_C_END
4937 
4938 #include "../src/mat/impls/dense/mpi/mpidense.h"
4939 
4940 #undef __FUNCT__
4941 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4942 /*
4943     Computes (B'*A')' since computing B*A directly is untenable
4944 
4945                n                       p                          p
4946         (              )       (              )         (                  )
4947       m (      A       )  *  n (       B      )   =   m (         C        )
4948         (              )       (              )         (                  )
4949 
4950 */
4951 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4952 {
4953   PetscErrorCode     ierr;
4954   Mat                At,Bt,Ct;
4955 
4956   PetscFunctionBegin;
4957   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4958   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4959   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4960   ierr = MatDestroy(At);CHKERRQ(ierr);
4961   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4962   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4963   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4964   PetscFunctionReturn(0);
4965 }
4966 
4967 #undef __FUNCT__
4968 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4969 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4970 {
4971   PetscErrorCode ierr;
4972   PetscInt       m=A->rmap->n,n=B->cmap->n;
4973   Mat            Cmat;
4974 
4975   PetscFunctionBegin;
4976   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4977   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4978   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4979   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4980   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4981   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4982   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4983   *C   = Cmat;
4984   PetscFunctionReturn(0);
4985 }
4986 
4987 /* ----------------------------------------------------------------*/
4988 #undef __FUNCT__
4989 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4990 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4991 {
4992   PetscErrorCode ierr;
4993 
4994   PetscFunctionBegin;
4995   if (scall == MAT_INITIAL_MATRIX){
4996     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4997   }
4998   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4999   PetscFunctionReturn(0);
5000 }
5001 
5002 EXTERN_C_BEGIN
5003 #if defined(PETSC_HAVE_MUMPS)
5004 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*);
5005 #endif
5006 #if defined(PETSC_HAVE_PASTIX)
5007 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5008 #endif
5009 #if defined(PETSC_HAVE_SUPERLU_DIST)
5010 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5011 #endif
5012 #if defined(PETSC_HAVE_SPOOLES)
5013 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5014 #endif
5015 EXTERN_C_END
5016 
5017 /*MC
5018    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5019 
5020    Options Database Keys:
5021 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5022 
5023   Level: beginner
5024 
5025 .seealso: MatCreateMPIAIJ()
5026 M*/
5027 
5028 EXTERN_C_BEGIN
5029 #undef __FUNCT__
5030 #define __FUNCT__ "MatCreate_MPIAIJ"
5031 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5032 {
5033   Mat_MPIAIJ     *b;
5034   PetscErrorCode ierr;
5035   PetscMPIInt    size;
5036 
5037   PetscFunctionBegin;
5038   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5039 
5040   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5041   B->data         = (void*)b;
5042   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5043   B->rmap->bs      = 1;
5044   B->assembled    = PETSC_FALSE;
5045   B->mapping      = 0;
5046 
5047   B->insertmode      = NOT_SET_VALUES;
5048   b->size            = size;
5049   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5050 
5051   /* build cache for off array entries formed */
5052   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5053   b->donotstash  = PETSC_FALSE;
5054   b->colmap      = 0;
5055   b->garray      = 0;
5056   b->roworiented = PETSC_TRUE;
5057 
5058   /* stuff used for matrix vector multiply */
5059   b->lvec      = PETSC_NULL;
5060   b->Mvctx     = PETSC_NULL;
5061 
5062   /* stuff for MatGetRow() */
5063   b->rowindices   = 0;
5064   b->rowvalues    = 0;
5065   b->getrowactive = PETSC_FALSE;
5066 
5067 #if defined(PETSC_HAVE_SPOOLES)
5068   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C",
5069                                      "MatGetFactor_mpiaij_spooles",
5070                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5071 #endif
5072 #if defined(PETSC_HAVE_MUMPS)
5073   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C",
5074                                      "MatGetFactor_mpiaij_mumps",
5075                                      MatGetFactor_mpiaij_mumps);CHKERRQ(ierr);
5076 #endif
5077 #if defined(PETSC_HAVE_PASTIX)
5078   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_pastix_C",
5079 					   "MatGetFactor_mpiaij_pastix",
5080 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5081 #endif
5082 #if defined(PETSC_HAVE_SUPERLU_DIST)
5083   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C",
5084                                      "MatGetFactor_mpiaij_superlu_dist",
5085                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5086 #endif
5087   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5088                                      "MatStoreValues_MPIAIJ",
5089                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5090   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5091                                      "MatRetrieveValues_MPIAIJ",
5092                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5093   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5094 				     "MatGetDiagonalBlock_MPIAIJ",
5095                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5096   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5097 				     "MatIsTranspose_MPIAIJ",
5098 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5099   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5100 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5101 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5102   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5103 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5104 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5105   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5106 				     "MatDiagonalScaleLocal_MPIAIJ",
5107 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5108   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
5109                                      "MatConvert_MPIAIJ_MPICSRPERM",
5110                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5111   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5112                                      "MatConvert_MPIAIJ_MPICRL",
5113                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5114   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5115                                      "MatMatMult_MPIDense_MPIAIJ",
5116                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5117   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5118                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5119                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5120   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5121                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5122                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5123   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5124   PetscFunctionReturn(0);
5125 }
5126 EXTERN_C_END
5127 
5128 #undef __FUNCT__
5129 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5130 /*@
5131      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5132          and "off-diagonal" part of the matrix in CSR format.
5133 
5134    Collective on MPI_Comm
5135 
5136    Input Parameters:
5137 +  comm - MPI communicator
5138 .  m - number of local rows (Cannot be PETSC_DECIDE)
5139 .  n - This value should be the same as the local size used in creating the
5140        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5141        calculated if N is given) For square matrices n is almost always m.
5142 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5143 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5144 .   i - row indices for "diagonal" portion of matrix
5145 .   j - column indices
5146 .   a - matrix values
5147 .   oi - row indices for "off-diagonal" portion of matrix
5148 .   oj - column indices
5149 -   oa - matrix values
5150 
5151    Output Parameter:
5152 .   mat - the matrix
5153 
5154    Level: advanced
5155 
5156    Notes:
5157        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5158 
5159        The i and j indices are 0 based
5160 
5161        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5162 
5163        This sets local rows and cannot be used to set off-processor values.
5164 
5165        You cannot later use MatSetValues() to change values in this matrix.
5166 
5167 .keywords: matrix, aij, compressed row, sparse, parallel
5168 
5169 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5170           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5171 @*/
5172 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5173 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5174 {
5175   PetscErrorCode ierr;
5176   Mat_MPIAIJ     *maij;
5177 
5178  PetscFunctionBegin;
5179   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5180   if (i[0]) {
5181     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5182   }
5183   if (oi[0]) {
5184     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5185   }
5186   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5187   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5188   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5189   maij = (Mat_MPIAIJ*) (*mat)->data;
5190   maij->donotstash     = PETSC_TRUE;
5191   (*mat)->preallocated = PETSC_TRUE;
5192 
5193   ierr = PetscMapSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5194   ierr = PetscMapSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5195   ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr);
5196   ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr);
5197 
5198   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5199   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5200 
5201   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5202   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5203   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5204   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5205 
5206   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5207   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5208   PetscFunctionReturn(0);
5209 }
5210 
5211 /*
5212     Special version for direct calls from Fortran
5213 */
5214 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5215 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5216 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5217 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5218 #endif
5219 
5220 /* Change these macros so can be used in void function */
5221 #undef CHKERRQ
5222 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5223 #undef SETERRQ2
5224 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5225 #undef SETERRQ
5226 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5227 
5228 EXTERN_C_BEGIN
5229 #undef __FUNCT__
5230 #define __FUNCT__ "matsetvaluesmpiaij_"
5231 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5232 {
5233   Mat             mat = *mmat;
5234   PetscInt        m = *mm, n = *mn;
5235   InsertMode      addv = *maddv;
5236   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5237   PetscScalar     value;
5238   PetscErrorCode  ierr;
5239 
5240   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5241   if (mat->insertmode == NOT_SET_VALUES) {
5242     mat->insertmode = addv;
5243   }
5244 #if defined(PETSC_USE_DEBUG)
5245   else if (mat->insertmode != addv) {
5246     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5247   }
5248 #endif
5249   {
5250   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5251   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5252   PetscTruth      roworiented = aij->roworiented;
5253 
5254   /* Some Variables required in the macro */
5255   Mat             A = aij->A;
5256   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5257   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5258   MatScalar       *aa = a->a;
5259   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5260   Mat             B = aij->B;
5261   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5262   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5263   MatScalar       *ba = b->a;
5264 
5265   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5266   PetscInt        nonew = a->nonew;
5267   MatScalar       *ap1,*ap2;
5268 
5269   PetscFunctionBegin;
5270   for (i=0; i<m; i++) {
5271     if (im[i] < 0) continue;
5272 #if defined(PETSC_USE_DEBUG)
5273     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5274 #endif
5275     if (im[i] >= rstart && im[i] < rend) {
5276       row      = im[i] - rstart;
5277       lastcol1 = -1;
5278       rp1      = aj + ai[row];
5279       ap1      = aa + ai[row];
5280       rmax1    = aimax[row];
5281       nrow1    = ailen[row];
5282       low1     = 0;
5283       high1    = nrow1;
5284       lastcol2 = -1;
5285       rp2      = bj + bi[row];
5286       ap2      = ba + bi[row];
5287       rmax2    = bimax[row];
5288       nrow2    = bilen[row];
5289       low2     = 0;
5290       high2    = nrow2;
5291 
5292       for (j=0; j<n; j++) {
5293         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5294         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5295         if (in[j] >= cstart && in[j] < cend){
5296           col = in[j] - cstart;
5297           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5298         } else if (in[j] < 0) continue;
5299 #if defined(PETSC_USE_DEBUG)
5300         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5301 #endif
5302         else {
5303           if (mat->was_assembled) {
5304             if (!aij->colmap) {
5305               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5306             }
5307 #if defined (PETSC_USE_CTABLE)
5308             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5309 	    col--;
5310 #else
5311             col = aij->colmap[in[j]] - 1;
5312 #endif
5313             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5314               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5315               col =  in[j];
5316               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5317               B = aij->B;
5318               b = (Mat_SeqAIJ*)B->data;
5319               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5320               rp2      = bj + bi[row];
5321               ap2      = ba + bi[row];
5322               rmax2    = bimax[row];
5323               nrow2    = bilen[row];
5324               low2     = 0;
5325               high2    = nrow2;
5326               bm       = aij->B->rmap->n;
5327               ba = b->a;
5328             }
5329           } else col = in[j];
5330           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5331         }
5332       }
5333     } else {
5334       if (!aij->donotstash) {
5335         if (roworiented) {
5336           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5337         } else {
5338           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5339         }
5340       }
5341     }
5342   }}
5343   PetscFunctionReturnVoid();
5344 }
5345 EXTERN_C_END
5346 
5347