xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 533163c2cd1b19f0f8b5307c6699b5cf12eec670)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "MatDistribute_MPIAIJ"
7 /*
8     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
9     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
10 
11     Only for square matrices
12 */
13 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
14 {
15   PetscMPIInt    rank,size;
16   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
17   PetscErrorCode ierr;
18   Mat            mat;
19   Mat_SeqAIJ     *gmata;
20   PetscMPIInt    tag;
21   MPI_Status     status;
22   PetscTruth     aij;
23   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
24 
25   PetscFunctionBegin;
26   CHKMEMQ;
27   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
28   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
29   if (!rank) {
30     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
31     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
32   }
33   if (reuse == MAT_INITIAL_MATRIX) {
34     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
35     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
36     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
37     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
38     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
39     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
40     rowners[0] = 0;
41     for (i=2; i<=size; i++) {
42       rowners[i] += rowners[i-1];
43     }
44     rstart = rowners[rank];
45     rend   = rowners[rank+1];
46     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
47     if (!rank) {
48       gmata = (Mat_SeqAIJ*) gmat->data;
49       /* send row lengths to all processors */
50       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
51       for (i=1; i<size; i++) {
52 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
53       }
54       /* determine number diagonal and off-diagonal counts */
55       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
56       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
57       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
58       jj = 0;
59       for (i=0; i<m; i++) {
60 	for (j=0; j<dlens[i]; j++) {
61           if (gmata->j[jj] < rstart) ld[i]++;
62 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
63 	  jj++;
64 	}
65       }
66       /* send column indices to other processes */
67       for (i=1; i<size; i++) {
68 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
69 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
70 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71       }
72 
73       /* send numerical values to other processes */
74       for (i=1; i<size; i++) {
75         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
76         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
77       }
78       gmataa = gmata->a;
79       gmataj = gmata->j;
80 
81     } else {
82       /* receive row lengths */
83       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
84       /* receive column indices */
85       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
86       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
87       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
88       /* determine number diagonal and off-diagonal counts */
89       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
90       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
91       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
92       jj = 0;
93       for (i=0; i<m; i++) {
94 	for (j=0; j<dlens[i]; j++) {
95           if (gmataj[jj] < rstart) ld[i]++;
96 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
97 	  jj++;
98 	}
99       }
100       /* receive numerical values */
101       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
102       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
103     }
104     /* set preallocation */
105     for (i=0; i<m; i++) {
106       dlens[i] -= olens[i];
107     }
108     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
109     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
110 
111     for (i=0; i<m; i++) {
112       dlens[i] += olens[i];
113     }
114     cnt  = 0;
115     for (i=0; i<m; i++) {
116       row  = rstart + i;
117       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
118       cnt += dlens[i];
119     }
120     if (rank) {
121       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
122     }
123     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
124     ierr = PetscFree(rowners);CHKERRQ(ierr);
125     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
126     *inmat = mat;
127   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
128     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
129     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
130     mat   = *inmat;
131     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
132     if (!rank) {
133       /* send numerical values to other processes */
134       gmata = (Mat_SeqAIJ*) gmat->data;
135       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
136       gmataa = gmata->a;
137       for (i=1; i<size; i++) {
138         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
139         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
140       }
141       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
142     } else {
143       /* receive numerical values from process 0*/
144       nz   = Ad->nz + Ao->nz;
145       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
146       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
147     }
148     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
149     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
150     ad = Ad->a;
151     ao = Ao->a;
152     if (mat->rmap->n) {
153       i  = 0;
154       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
155       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
156     }
157     for (i=1; i<mat->rmap->n; i++) {
158       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
159       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
160     }
161     i--;
162     if (mat->rmap->n) {
163       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
164     }
165     if (rank) {
166       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
167     }
168   }
169   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
170   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   CHKMEMQ;
172   PetscFunctionReturn(0);
173 }
174 
175 /*
176   Local utility routine that creates a mapping from the global column
177 number to the local number in the off-diagonal part of the local
178 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
179 a slightly higher hash table cost; without it it is not scalable (each processor
180 has an order N integer array but is fast to acess.
181 */
182 #undef __FUNCT__
183 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
184 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
185 {
186   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
187   PetscErrorCode ierr;
188   PetscInt       n = aij->B->cmap->n,i;
189 
190   PetscFunctionBegin;
191 #if defined (PETSC_USE_CTABLE)
192   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
193   for (i=0; i<n; i++){
194     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
195   }
196 #else
197   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
198   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
199   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
201 #endif
202   PetscFunctionReturn(0);
203 }
204 
205 
206 #define CHUNKSIZE   15
207 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
208 { \
209     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
210     lastcol1 = col;\
211     while (high1-low1 > 5) { \
212       t = (low1+high1)/2; \
213       if (rp1[t] > col) high1 = t; \
214       else             low1  = t; \
215     } \
216       for (_i=low1; _i<high1; _i++) { \
217         if (rp1[_i] > col) break; \
218         if (rp1[_i] == col) { \
219           if (addv == ADD_VALUES) ap1[_i] += value;   \
220           else                    ap1[_i] = value; \
221           goto a_noinsert; \
222         } \
223       }  \
224       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
225       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
226       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
227       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
228       N = nrow1++ - 1; a->nz++; high1++; \
229       /* shift up all the later entries in this row */ \
230       for (ii=N; ii>=_i; ii--) { \
231         rp1[ii+1] = rp1[ii]; \
232         ap1[ii+1] = ap1[ii]; \
233       } \
234       rp1[_i] = col;  \
235       ap1[_i] = value;  \
236       a_noinsert: ; \
237       ailen[row] = nrow1; \
238 }
239 
240 
241 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
242 { \
243     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
244     lastcol2 = col;\
245     while (high2-low2 > 5) { \
246       t = (low2+high2)/2; \
247       if (rp2[t] > col) high2 = t; \
248       else             low2  = t; \
249     } \
250     for (_i=low2; _i<high2; _i++) {		\
251       if (rp2[_i] > col) break;			\
252       if (rp2[_i] == col) {			      \
253 	if (addv == ADD_VALUES) ap2[_i] += value;     \
254 	else                    ap2[_i] = value;      \
255 	goto b_noinsert;			      \
256       }						      \
257     }							      \
258     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
259     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
260     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
261     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
262     N = nrow2++ - 1; b->nz++; high2++;					\
263     /* shift up all the later entries in this row */			\
264     for (ii=N; ii>=_i; ii--) {						\
265       rp2[ii+1] = rp2[ii];						\
266       ap2[ii+1] = ap2[ii];						\
267     }									\
268     rp2[_i] = col;							\
269     ap2[_i] = value;							\
270     b_noinsert: ;								\
271     bilen[row] = nrow2;							\
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
276 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
277 {
278   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
279   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
280   PetscErrorCode ierr;
281   PetscInt       l,*garray = mat->garray,diag;
282 
283   PetscFunctionBegin;
284   /* code only works for square matrices A */
285 
286   /* find size of row to the left of the diagonal part */
287   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
288   row  = row - diag;
289   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
290     if (garray[b->j[b->i[row]+l]] > diag) break;
291   }
292   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
293 
294   /* diagonal part */
295   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
296 
297   /* right of diagonal part */
298   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
299   PetscFunctionReturn(0);
300 }
301 
302 #undef __FUNCT__
303 #define __FUNCT__ "MatSetValues_MPIAIJ"
304 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
305 {
306   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
307   PetscScalar    value;
308   PetscErrorCode ierr;
309   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
310   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
311   PetscTruth     roworiented = aij->roworiented;
312 
313   /* Some Variables required in the macro */
314   Mat            A = aij->A;
315   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
316   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
317   MatScalar      *aa = a->a;
318   PetscTruth     ignorezeroentries = a->ignorezeroentries;
319   Mat            B = aij->B;
320   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
321   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
322   MatScalar      *ba = b->a;
323 
324   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
325   PetscInt       nonew = a->nonew;
326   MatScalar      *ap1,*ap2;
327 
328   PetscFunctionBegin;
329   if (v) PetscValidScalarPointer(v,6);
330   for (i=0; i<m; i++) {
331     if (im[i] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
334 #endif
335     if (im[i] >= rstart && im[i] < rend) {
336       row      = im[i] - rstart;
337       lastcol1 = -1;
338       rp1      = aj + ai[row];
339       ap1      = aa + ai[row];
340       rmax1    = aimax[row];
341       nrow1    = ailen[row];
342       low1     = 0;
343       high1    = nrow1;
344       lastcol2 = -1;
345       rp2      = bj + bi[row];
346       ap2      = ba + bi[row];
347       rmax2    = bimax[row];
348       nrow2    = bilen[row];
349       low2     = 0;
350       high2    = nrow2;
351 
352       for (j=0; j<n; j++) {
353         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
354         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
355         if (in[j] >= cstart && in[j] < cend){
356           col = in[j] - cstart;
357           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
358         } else if (in[j] < 0) continue;
359 #if defined(PETSC_USE_DEBUG)
360         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
361 #endif
362         else {
363           if (mat->was_assembled) {
364             if (!aij->colmap) {
365               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
366             }
367 #if defined (PETSC_USE_CTABLE)
368             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
369 	    col--;
370 #else
371             col = aij->colmap[in[j]] - 1;
372 #endif
373             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
374               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
375               col =  in[j];
376               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
377               B = aij->B;
378               b = (Mat_SeqAIJ*)B->data;
379               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
380               rp2      = bj + bi[row];
381               ap2      = ba + bi[row];
382               rmax2    = bimax[row];
383               nrow2    = bilen[row];
384               low2     = 0;
385               high2    = nrow2;
386               bm       = aij->B->rmap->n;
387               ba = b->a;
388             }
389           } else col = in[j];
390           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
391         }
392       }
393     } else {
394       if (!aij->donotstash) {
395         if (roworiented) {
396           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
397         } else {
398           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
399         }
400       }
401     }
402   }
403   PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "MatGetValues_MPIAIJ"
408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
413   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
414 
415   PetscFunctionBegin;
416   for (i=0; i<m; i++) {
417     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
418     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
419     if (idxm[i] >= rstart && idxm[i] < rend) {
420       row = idxm[i] - rstart;
421       for (j=0; j<n; j++) {
422         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
423         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
424         if (idxn[j] >= cstart && idxn[j] < cend){
425           col = idxn[j] - cstart;
426           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
427         } else {
428           if (!aij->colmap) {
429             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
430           }
431 #if defined (PETSC_USE_CTABLE)
432           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
433           col --;
434 #else
435           col = aij->colmap[idxn[j]] - 1;
436 #endif
437           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
438           else {
439             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
440           }
441         }
442       }
443     } else {
444       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
445     }
446   }
447   PetscFunctionReturn(0);
448 }
449 
450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscErrorCode ierr;
458   PetscInt       nstash,reallocs;
459   InsertMode     addv;
460 
461   PetscFunctionBegin;
462   if (aij->donotstash) {
463     PetscFunctionReturn(0);
464   }
465 
466   /* make sure all processors are either in INSERTMODE or ADDMODE */
467   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
468   if (addv == (ADD_VALUES|INSERT_VALUES)) {
469     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
470   }
471   mat->insertmode = addv; /* in case this processor had no cache */
472 
473   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
474   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
475   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
485   PetscErrorCode ierr;
486   PetscMPIInt    n;
487   PetscInt       i,j,rstart,ncols,flg;
488   PetscInt       *row,*col;
489   PetscTruth     other_disassembled;
490   PetscScalar    *val;
491   InsertMode     addv = mat->insertmode;
492 
493   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
494   PetscFunctionBegin;
495   if (!aij->donotstash) {
496     while (1) {
497       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
498       if (!flg) break;
499 
500       for (i=0; i<n;) {
501         /* Now identify the consecutive vals belonging to the same row */
502         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
503         if (j < n) ncols = j-i;
504         else       ncols = n-i;
505         /* Now assemble all these values with a single function call */
506         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
507         i = j;
508       }
509     }
510     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
511   }
512   a->compressedrow.use     = PETSC_FALSE;
513   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
514   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
515 
516   /* determine if any processor has disassembled, if so we must
517      also disassemble ourselfs, in order that we may reassemble. */
518   /*
519      if nonzero structure of submatrix B cannot change then we know that
520      no processor disassembled thus we can skip this stuff
521   */
522   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
523     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
524     if (mat->was_assembled && !other_disassembled) {
525       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
526     }
527   }
528   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
529     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
530   }
531   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
532   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
533   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
534   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
535 
536   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
537   aij->rowvalues = 0;
538 
539   /* used by MatAXPY() */
540   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
541   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
542 
543   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
544   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
545   PetscFunctionReturn(0);
546 }
547 
548 #undef __FUNCT__
549 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
550 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
551 {
552   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
553   PetscErrorCode ierr;
554 
555   PetscFunctionBegin;
556   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
557   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
558   PetscFunctionReturn(0);
559 }
560 
561 #undef __FUNCT__
562 #define __FUNCT__ "MatZeroRows_MPIAIJ"
563 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
564 {
565   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
566   PetscErrorCode ierr;
567   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
568   PetscInt       i,*owners = A->rmap->range;
569   PetscInt       *nprocs,j,idx,nsends,row;
570   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
571   PetscInt       *rvalues,count,base,slen,*source;
572   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
573   MPI_Comm       comm = ((PetscObject)A)->comm;
574   MPI_Request    *send_waits,*recv_waits;
575   MPI_Status     recv_status,*send_status;
576 #if defined(PETSC_DEBUG)
577   PetscTruth     found = PETSC_FALSE;
578 #endif
579 
580   PetscFunctionBegin;
581   /*  first count number of contributors to each processor */
582   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
583   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
584   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
585   j = 0;
586   for (i=0; i<N; i++) {
587     if (lastidx > (idx = rows[i])) j = 0;
588     lastidx = idx;
589     for (; j<size; j++) {
590       if (idx >= owners[j] && idx < owners[j+1]) {
591         nprocs[2*j]++;
592         nprocs[2*j+1] = 1;
593         owner[i] = j;
594 #if defined(PETSC_DEBUG)
595         found = PETSC_TRUE;
596 #endif
597         break;
598       }
599     }
600 #if defined(PETSC_DEBUG)
601     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
602     found = PETSC_FALSE;
603 #endif
604   }
605   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
606 
607   /* inform other processors of number of messages and max length*/
608   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
609 
610   /* post receives:   */
611   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
612   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
613   for (i=0; i<nrecvs; i++) {
614     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
615   }
616 
617   /* do sends:
618       1) starts[i] gives the starting index in svalues for stuff going to
619          the ith processor
620   */
621   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
622   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
623   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
624   starts[0] = 0;
625   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
626   for (i=0; i<N; i++) {
627     svalues[starts[owner[i]]++] = rows[i];
628   }
629 
630   starts[0] = 0;
631   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
632   count = 0;
633   for (i=0; i<size; i++) {
634     if (nprocs[2*i+1]) {
635       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
636     }
637   }
638   ierr = PetscFree(starts);CHKERRQ(ierr);
639 
640   base = owners[rank];
641 
642   /*  wait on receives */
643   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
644   count  = nrecvs; slen = 0;
645   while (count) {
646     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
647     /* unpack receives into our local space */
648     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
649     source[imdex]  = recv_status.MPI_SOURCE;
650     lens[imdex]    = n;
651     slen          += n;
652     count--;
653   }
654   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
655 
656   /* move the data into the send scatter */
657   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
658   count = 0;
659   for (i=0; i<nrecvs; i++) {
660     values = rvalues + i*nmax;
661     for (j=0; j<lens[i]; j++) {
662       lrows[count++] = values[j] - base;
663     }
664   }
665   ierr = PetscFree(rvalues);CHKERRQ(ierr);
666   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
667   ierr = PetscFree(owner);CHKERRQ(ierr);
668   ierr = PetscFree(nprocs);CHKERRQ(ierr);
669 
670   /* actually zap the local rows */
671   /*
672         Zero the required rows. If the "diagonal block" of the matrix
673      is square and the user wishes to set the diagonal we use separate
674      code so that MatSetValues() is not called for each diagonal allocating
675      new memory, thus calling lots of mallocs and slowing things down.
676 
677   */
678   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
679   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
680   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
681     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
682   } else if (diag != 0.0) {
683     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
684     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
685       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
687     }
688     for (i = 0; i < slen; i++) {
689       row  = lrows[i] + rstart;
690       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
691     }
692     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
693     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
694   } else {
695     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
696   }
697   ierr = PetscFree(lrows);CHKERRQ(ierr);
698 
699   /* wait on sends */
700   if (nsends) {
701     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
702     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
703     ierr = PetscFree(send_status);CHKERRQ(ierr);
704   }
705   ierr = PetscFree(send_waits);CHKERRQ(ierr);
706   ierr = PetscFree(svalues);CHKERRQ(ierr);
707 
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatMult_MPIAIJ"
713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
714 {
715   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
716   PetscErrorCode ierr;
717   PetscInt       nt;
718 
719   PetscFunctionBegin;
720   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
721   if (nt != A->cmap->n) {
722     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
723   }
724   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
725   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
726   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
727   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNCT__
732 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
733 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
734 {
735   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
736   PetscErrorCode ierr;
737 
738   PetscFunctionBegin;
739   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatMultAdd_MPIAIJ"
745 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
746 {
747   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
748   PetscErrorCode ierr;
749 
750   PetscFunctionBegin;
751   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
752   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
753   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
754   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
755   PetscFunctionReturn(0);
756 }
757 
758 #undef __FUNCT__
759 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
760 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
761 {
762   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
763   PetscErrorCode ierr;
764   PetscTruth     merged;
765 
766   PetscFunctionBegin;
767   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
768   /* do nondiagonal part */
769   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
770   if (!merged) {
771     /* send it on its way */
772     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
773     /* do local part */
774     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
775     /* receive remote parts: note this assumes the values are not actually */
776     /* added in yy until the next line, */
777     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
778   } else {
779     /* do local part */
780     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
781     /* send it on its way */
782     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
783     /* values actually were received in the Begin() but we need to call this nop */
784     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
785   }
786   PetscFunctionReturn(0);
787 }
788 
789 EXTERN_C_BEGIN
790 #undef __FUNCT__
791 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
792 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
793 {
794   MPI_Comm       comm;
795   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
796   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
797   IS             Me,Notme;
798   PetscErrorCode ierr;
799   PetscInt       M,N,first,last,*notme,i;
800   PetscMPIInt    size;
801 
802   PetscFunctionBegin;
803 
804   /* Easy test: symmetric diagonal block */
805   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
806   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
807   if (!*f) PetscFunctionReturn(0);
808   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
809   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
810   if (size == 1) PetscFunctionReturn(0);
811 
812   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
813   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
814   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
815   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
816   for (i=0; i<first; i++) notme[i] = i;
817   for (i=last; i<M; i++) notme[i-last+first] = i;
818   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
819   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
820   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
821   Aoff = Aoffs[0];
822   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
823   Boff = Boffs[0];
824   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
825   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
826   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
827   ierr = ISDestroy(Me);CHKERRQ(ierr);
828   ierr = ISDestroy(Notme);CHKERRQ(ierr);
829 
830   PetscFunctionReturn(0);
831 }
832 EXTERN_C_END
833 
834 #undef __FUNCT__
835 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
836 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
837 {
838   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
839   PetscErrorCode ierr;
840 
841   PetscFunctionBegin;
842   /* do nondiagonal part */
843   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
844   /* send it on its way */
845   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
846   /* do local part */
847   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
848   /* receive remote parts */
849   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
850   PetscFunctionReturn(0);
851 }
852 
853 /*
854   This only works correctly for square matrices where the subblock A->A is the
855    diagonal block
856 */
857 #undef __FUNCT__
858 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
859 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
860 {
861   PetscErrorCode ierr;
862   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
863 
864   PetscFunctionBegin;
865   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
866   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) {
867     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
868   }
869   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatScale_MPIAIJ"
875 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
876 {
877   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
878   PetscErrorCode ierr;
879 
880   PetscFunctionBegin;
881   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
882   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
883   PetscFunctionReturn(0);
884 }
885 
886 #undef __FUNCT__
887 #define __FUNCT__ "MatDestroy_MPIAIJ"
888 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
889 {
890   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
891   PetscErrorCode ierr;
892 
893   PetscFunctionBegin;
894 #if defined(PETSC_USE_LOG)
895   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
896 #endif
897   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
898   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
899   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
900   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
901 #if defined (PETSC_USE_CTABLE)
902   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
903 #else
904   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
905 #endif
906   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
907   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
908   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
909   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
910   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
911   ierr = PetscFree(aij);CHKERRQ(ierr);
912 
913   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
915   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
916   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
917   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
918   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
919   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
920   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
921   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
922   PetscFunctionReturn(0);
923 }
924 
925 #undef __FUNCT__
926 #define __FUNCT__ "MatView_MPIAIJ_Binary"
927 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
928 {
929   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
930   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
931   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
932   PetscErrorCode    ierr;
933   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
934   int               fd;
935   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
936   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
937   PetscScalar       *column_values;
938 
939   PetscFunctionBegin;
940   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
941   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
942   nz   = A->nz + B->nz;
943   if (!rank) {
944     header[0] = MAT_FILE_COOKIE;
945     header[1] = mat->rmap->N;
946     header[2] = mat->cmap->N;
947     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
948     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
949     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
950     /* get largest number of rows any processor has */
951     rlen = mat->rmap->n;
952     range = mat->rmap->range;
953     for (i=1; i<size; i++) {
954       rlen = PetscMax(rlen,range[i+1] - range[i]);
955     }
956   } else {
957     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
958     rlen = mat->rmap->n;
959   }
960 
961   /* load up the local row counts */
962   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
963   for (i=0; i<mat->rmap->n; i++) {
964     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
965   }
966 
967   /* store the row lengths to the file */
968   if (!rank) {
969     MPI_Status status;
970     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
971     for (i=1; i<size; i++) {
972       rlen = range[i+1] - range[i];
973       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
974       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
975     }
976   } else {
977     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
978   }
979   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
980 
981   /* load up the local column indices */
982   nzmax = nz; /* )th processor needs space a largest processor needs */
983   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
984   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
985   cnt  = 0;
986   for (i=0; i<mat->rmap->n; i++) {
987     for (j=B->i[i]; j<B->i[i+1]; j++) {
988       if ( (col = garray[B->j[j]]) > cstart) break;
989       column_indices[cnt++] = col;
990     }
991     for (k=A->i[i]; k<A->i[i+1]; k++) {
992       column_indices[cnt++] = A->j[k] + cstart;
993     }
994     for (; j<B->i[i+1]; j++) {
995       column_indices[cnt++] = garray[B->j[j]];
996     }
997   }
998   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
999 
1000   /* store the column indices to the file */
1001   if (!rank) {
1002     MPI_Status status;
1003     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1004     for (i=1; i<size; i++) {
1005       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1006       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1007       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1008       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1009     }
1010   } else {
1011     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1012     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1013   }
1014   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1015 
1016   /* load up the local column values */
1017   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1018   cnt  = 0;
1019   for (i=0; i<mat->rmap->n; i++) {
1020     for (j=B->i[i]; j<B->i[i+1]; j++) {
1021       if ( garray[B->j[j]] > cstart) break;
1022       column_values[cnt++] = B->a[j];
1023     }
1024     for (k=A->i[i]; k<A->i[i+1]; k++) {
1025       column_values[cnt++] = A->a[k];
1026     }
1027     for (; j<B->i[i+1]; j++) {
1028       column_values[cnt++] = B->a[j];
1029     }
1030   }
1031   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1032 
1033   /* store the column values to the file */
1034   if (!rank) {
1035     MPI_Status status;
1036     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1037     for (i=1; i<size; i++) {
1038       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1039       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1040       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1041       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1042     }
1043   } else {
1044     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1045     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1046   }
1047   ierr = PetscFree(column_values);CHKERRQ(ierr);
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 #undef __FUNCT__
1052 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1053 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1054 {
1055   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1056   PetscErrorCode    ierr;
1057   PetscMPIInt       rank = aij->rank,size = aij->size;
1058   PetscTruth        isdraw,iascii,isbinary;
1059   PetscViewer       sviewer;
1060   PetscViewerFormat format;
1061 
1062   PetscFunctionBegin;
1063   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1064   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1065   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1066   if (iascii) {
1067     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1068     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1069       MatInfo    info;
1070       PetscTruth inodes;
1071 
1072       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1073       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1074       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1075       if (!inodes) {
1076         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1077 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1078       } else {
1079         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1080 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1081       }
1082       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1083       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1084       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1085       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1086       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1087       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1088       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1089       PetscFunctionReturn(0);
1090     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1091       PetscInt   inodecount,inodelimit,*inodes;
1092       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1093       if (inodes) {
1094         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1095       } else {
1096         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1097       }
1098       PetscFunctionReturn(0);
1099     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1100       PetscFunctionReturn(0);
1101     }
1102   } else if (isbinary) {
1103     if (size == 1) {
1104       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1105       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1106     } else {
1107       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1108     }
1109     PetscFunctionReturn(0);
1110   } else if (isdraw) {
1111     PetscDraw  draw;
1112     PetscTruth isnull;
1113     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1114     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1115   }
1116 
1117   if (size == 1) {
1118     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1119     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1120   } else {
1121     /* assemble the entire matrix onto first processor. */
1122     Mat         A;
1123     Mat_SeqAIJ  *Aloc;
1124     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1125     MatScalar   *a;
1126 
1127     if (mat->rmap->N > 1024) {
1128       PetscTruth flg = PETSC_FALSE;
1129 
1130       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1131       if (!flg) {
1132         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1133       }
1134     }
1135 
1136     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1137     if (!rank) {
1138       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1139     } else {
1140       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1141     }
1142     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1143     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1144     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1145     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1146 
1147     /* copy over the A part */
1148     Aloc = (Mat_SeqAIJ*)aij->A->data;
1149     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1150     row = mat->rmap->rstart;
1151     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1152     for (i=0; i<m; i++) {
1153       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1154       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1155     }
1156     aj = Aloc->j;
1157     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1158 
1159     /* copy over the B part */
1160     Aloc = (Mat_SeqAIJ*)aij->B->data;
1161     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1162     row  = mat->rmap->rstart;
1163     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1164     ct   = cols;
1165     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1166     for (i=0; i<m; i++) {
1167       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1168       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1169     }
1170     ierr = PetscFree(ct);CHKERRQ(ierr);
1171     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1172     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1173     /*
1174        Everyone has to call to draw the matrix since the graphics waits are
1175        synchronized across all processors that share the PetscDraw object
1176     */
1177     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1178     if (!rank) {
1179       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1180       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1181     }
1182     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1183     ierr = MatDestroy(A);CHKERRQ(ierr);
1184   }
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatView_MPIAIJ"
1190 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1191 {
1192   PetscErrorCode ierr;
1193   PetscTruth     iascii,isdraw,issocket,isbinary;
1194 
1195   PetscFunctionBegin;
1196   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1197   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1198   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1199   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1200   if (iascii || isdraw || isbinary || issocket) {
1201     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1202   } else {
1203     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1204   }
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 #undef __FUNCT__
1209 #define __FUNCT__ "MatSOR_MPIAIJ"
1210 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1211 {
1212   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1213   PetscErrorCode ierr;
1214   Vec            bb1 = 0;
1215   PetscTruth     hasop;
1216 
1217   PetscFunctionBegin;
1218   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1219     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1220   }
1221 
1222   if (flag == SOR_APPLY_UPPER) {
1223     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1224     PetscFunctionReturn(0);
1225   }
1226 
1227   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1228     if (flag & SOR_ZERO_INITIAL_GUESS) {
1229       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1230       its--;
1231     }
1232 
1233     while (its--) {
1234       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1235       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1236 
1237       /* update rhs: bb1 = bb - B*x */
1238       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1239       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1240 
1241       /* local sweep */
1242       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1243     }
1244   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1245     if (flag & SOR_ZERO_INITIAL_GUESS) {
1246       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1247       its--;
1248     }
1249     while (its--) {
1250       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1251       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1252 
1253       /* update rhs: bb1 = bb - B*x */
1254       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1255       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1256 
1257       /* local sweep */
1258       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1259     }
1260   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1261     if (flag & SOR_ZERO_INITIAL_GUESS) {
1262       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1263       its--;
1264     }
1265     while (its--) {
1266       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1267       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1268 
1269       /* update rhs: bb1 = bb - B*x */
1270       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1271       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1272 
1273       /* local sweep */
1274       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1275     }
1276   }  else if (flag & SOR_EISENSTAT) {
1277     Vec         xx1;
1278 
1279     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1280     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1281 
1282     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1283     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1284     if (!mat->diag) {
1285       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1286       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1287     }
1288     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1289     if (hasop) {
1290       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1291     } else {
1292       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1293     }
1294     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1295 
1296     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1297 
1298     /* local sweep */
1299     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1300     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1301     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1302   } else {
1303     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1304   }
1305 
1306   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatPermute_MPIAIJ"
1312 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1313 {
1314   MPI_Comm       comm,pcomm;
1315   PetscInt       first,local_size,nrows;
1316   const PetscInt *rows;
1317   PetscMPIInt    size;
1318   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1319   PetscErrorCode ierr;
1320 
1321   PetscFunctionBegin;
1322   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1323   /* make a collective version of 'rowp' */
1324   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1325   if (pcomm==comm) {
1326     crowp = rowp;
1327   } else {
1328     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1329     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1330     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1331     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1332   }
1333   /* collect the global row permutation and invert it */
1334   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1335   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1336   if (pcomm!=comm) {
1337     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1338   }
1339   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1340   /* get the local target indices */
1341   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1342   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1343   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1344   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1345   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1346   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1347   /* the column permutation is so much easier;
1348      make a local version of 'colp' and invert it */
1349   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1350   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1351   if (size==1) {
1352     lcolp = colp;
1353   } else {
1354     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1355     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1356     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1357   }
1358   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1359   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1360   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1361   if (size>1) {
1362     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1363     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1364   }
1365   /* now we just get the submatrix */
1366   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1367   /* clean up */
1368   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1369   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 #undef __FUNCT__
1374 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1375 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1376 {
1377   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1378   Mat            A = mat->A,B = mat->B;
1379   PetscErrorCode ierr;
1380   PetscReal      isend[5],irecv[5];
1381 
1382   PetscFunctionBegin;
1383   info->block_size     = 1.0;
1384   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1385   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1386   isend[3] = info->memory;  isend[4] = info->mallocs;
1387   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1388   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1389   isend[3] += info->memory;  isend[4] += info->mallocs;
1390   if (flag == MAT_LOCAL) {
1391     info->nz_used      = isend[0];
1392     info->nz_allocated = isend[1];
1393     info->nz_unneeded  = isend[2];
1394     info->memory       = isend[3];
1395     info->mallocs      = isend[4];
1396   } else if (flag == MAT_GLOBAL_MAX) {
1397     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1398     info->nz_used      = irecv[0];
1399     info->nz_allocated = irecv[1];
1400     info->nz_unneeded  = irecv[2];
1401     info->memory       = irecv[3];
1402     info->mallocs      = irecv[4];
1403   } else if (flag == MAT_GLOBAL_SUM) {
1404     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1405     info->nz_used      = irecv[0];
1406     info->nz_allocated = irecv[1];
1407     info->nz_unneeded  = irecv[2];
1408     info->memory       = irecv[3];
1409     info->mallocs      = irecv[4];
1410   }
1411   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1412   info->fill_ratio_needed = 0;
1413   info->factor_mallocs    = 0;
1414 
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 #undef __FUNCT__
1419 #define __FUNCT__ "MatSetOption_MPIAIJ"
1420 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1421 {
1422   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1423   PetscErrorCode ierr;
1424 
1425   PetscFunctionBegin;
1426   switch (op) {
1427   case MAT_NEW_NONZERO_LOCATIONS:
1428   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1429   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1430   case MAT_KEEP_NONZERO_PATTERN:
1431   case MAT_NEW_NONZERO_LOCATION_ERR:
1432   case MAT_USE_INODES:
1433   case MAT_IGNORE_ZERO_ENTRIES:
1434     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1435     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1436     break;
1437   case MAT_ROW_ORIENTED:
1438     a->roworiented = flg;
1439     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1440     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1441     break;
1442   case MAT_NEW_DIAGONALS:
1443     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1444     break;
1445   case MAT_IGNORE_OFF_PROC_ENTRIES:
1446     a->donotstash = PETSC_TRUE;
1447     break;
1448   case MAT_SYMMETRIC:
1449     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1450     break;
1451   case MAT_STRUCTURALLY_SYMMETRIC:
1452     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1453     break;
1454   case MAT_HERMITIAN:
1455     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1456     break;
1457   case MAT_SYMMETRY_ETERNAL:
1458     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1459     break;
1460   default:
1461     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1462   }
1463   PetscFunctionReturn(0);
1464 }
1465 
1466 #undef __FUNCT__
1467 #define __FUNCT__ "MatGetRow_MPIAIJ"
1468 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1469 {
1470   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1471   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1472   PetscErrorCode ierr;
1473   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1474   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1475   PetscInt       *cmap,*idx_p;
1476 
1477   PetscFunctionBegin;
1478   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1479   mat->getrowactive = PETSC_TRUE;
1480 
1481   if (!mat->rowvalues && (idx || v)) {
1482     /*
1483         allocate enough space to hold information from the longest row.
1484     */
1485     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1486     PetscInt   max = 1,tmp;
1487     for (i=0; i<matin->rmap->n; i++) {
1488       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1489       if (max < tmp) { max = tmp; }
1490     }
1491     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1492   }
1493 
1494   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1495   lrow = row - rstart;
1496 
1497   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1498   if (!v)   {pvA = 0; pvB = 0;}
1499   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1500   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1501   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1502   nztot = nzA + nzB;
1503 
1504   cmap  = mat->garray;
1505   if (v  || idx) {
1506     if (nztot) {
1507       /* Sort by increasing column numbers, assuming A and B already sorted */
1508       PetscInt imark = -1;
1509       if (v) {
1510         *v = v_p = mat->rowvalues;
1511         for (i=0; i<nzB; i++) {
1512           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1513           else break;
1514         }
1515         imark = i;
1516         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1517         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1518       }
1519       if (idx) {
1520         *idx = idx_p = mat->rowindices;
1521         if (imark > -1) {
1522           for (i=0; i<imark; i++) {
1523             idx_p[i] = cmap[cworkB[i]];
1524           }
1525         } else {
1526           for (i=0; i<nzB; i++) {
1527             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1528             else break;
1529           }
1530           imark = i;
1531         }
1532         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1533         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1534       }
1535     } else {
1536       if (idx) *idx = 0;
1537       if (v)   *v   = 0;
1538     }
1539   }
1540   *nz = nztot;
1541   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1542   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1543   PetscFunctionReturn(0);
1544 }
1545 
1546 #undef __FUNCT__
1547 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1548 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1549 {
1550   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1551 
1552   PetscFunctionBegin;
1553   if (!aij->getrowactive) {
1554     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1555   }
1556   aij->getrowactive = PETSC_FALSE;
1557   PetscFunctionReturn(0);
1558 }
1559 
1560 #undef __FUNCT__
1561 #define __FUNCT__ "MatNorm_MPIAIJ"
1562 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1563 {
1564   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1565   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1566   PetscErrorCode ierr;
1567   PetscInt       i,j,cstart = mat->cmap->rstart;
1568   PetscReal      sum = 0.0;
1569   MatScalar      *v;
1570 
1571   PetscFunctionBegin;
1572   if (aij->size == 1) {
1573     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1574   } else {
1575     if (type == NORM_FROBENIUS) {
1576       v = amat->a;
1577       for (i=0; i<amat->nz; i++) {
1578 #if defined(PETSC_USE_COMPLEX)
1579         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1580 #else
1581         sum += (*v)*(*v); v++;
1582 #endif
1583       }
1584       v = bmat->a;
1585       for (i=0; i<bmat->nz; i++) {
1586 #if defined(PETSC_USE_COMPLEX)
1587         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1588 #else
1589         sum += (*v)*(*v); v++;
1590 #endif
1591       }
1592       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1593       *norm = sqrt(*norm);
1594     } else if (type == NORM_1) { /* max column norm */
1595       PetscReal *tmp,*tmp2;
1596       PetscInt  *jj,*garray = aij->garray;
1597       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1598       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1599       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1600       *norm = 0.0;
1601       v = amat->a; jj = amat->j;
1602       for (j=0; j<amat->nz; j++) {
1603         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1604       }
1605       v = bmat->a; jj = bmat->j;
1606       for (j=0; j<bmat->nz; j++) {
1607         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1608       }
1609       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1610       for (j=0; j<mat->cmap->N; j++) {
1611         if (tmp2[j] > *norm) *norm = tmp2[j];
1612       }
1613       ierr = PetscFree(tmp);CHKERRQ(ierr);
1614       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1615     } else if (type == NORM_INFINITY) { /* max row norm */
1616       PetscReal ntemp = 0.0;
1617       for (j=0; j<aij->A->rmap->n; j++) {
1618         v = amat->a + amat->i[j];
1619         sum = 0.0;
1620         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1621           sum += PetscAbsScalar(*v); v++;
1622         }
1623         v = bmat->a + bmat->i[j];
1624         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1625           sum += PetscAbsScalar(*v); v++;
1626         }
1627         if (sum > ntemp) ntemp = sum;
1628       }
1629       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1630     } else {
1631       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1632     }
1633   }
1634   PetscFunctionReturn(0);
1635 }
1636 
1637 #undef __FUNCT__
1638 #define __FUNCT__ "MatTranspose_MPIAIJ"
1639 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1640 {
1641   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1642   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1643   PetscErrorCode ierr;
1644   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1645   PetscInt       cstart=A->cmap->rstart,ncol;
1646   Mat            B;
1647   MatScalar      *array;
1648 
1649   PetscFunctionBegin;
1650   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1651 
1652   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1653   ai = Aloc->i; aj = Aloc->j;
1654   bi = Bloc->i; bj = Bloc->j;
1655   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1656     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1657     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1658     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1659     for (i=0; i<ai[ma]; i++){
1660       d_nnz[aj[i]] ++;
1661       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1662     }
1663 
1664     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1665     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1666     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1667     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1668     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1669   } else {
1670     B = *matout;
1671   }
1672 
1673   /* copy over the A part */
1674   array = Aloc->a;
1675   row = A->rmap->rstart;
1676   for (i=0; i<ma; i++) {
1677     ncol = ai[i+1]-ai[i];
1678     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1679     row++; array += ncol; aj += ncol;
1680   }
1681   aj = Aloc->j;
1682   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1683 
1684   /* copy over the B part */
1685   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1686   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1687   array = Bloc->a;
1688   row = A->rmap->rstart;
1689   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1690   cols_tmp = cols;
1691   for (i=0; i<mb; i++) {
1692     ncol = bi[i+1]-bi[i];
1693     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1694     row++; array += ncol; cols_tmp += ncol;
1695   }
1696   ierr = PetscFree(cols);CHKERRQ(ierr);
1697 
1698   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1699   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1700   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1701     *matout = B;
1702   } else {
1703     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1704   }
1705   PetscFunctionReturn(0);
1706 }
1707 
1708 #undef __FUNCT__
1709 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1710 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1711 {
1712   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1713   Mat            a = aij->A,b = aij->B;
1714   PetscErrorCode ierr;
1715   PetscInt       s1,s2,s3;
1716 
1717   PetscFunctionBegin;
1718   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1719   if (rr) {
1720     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1721     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1722     /* Overlap communication with computation. */
1723     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1724   }
1725   if (ll) {
1726     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1727     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1728     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1729   }
1730   /* scale  the diagonal block */
1731   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1732 
1733   if (rr) {
1734     /* Do a scatter end and then right scale the off-diagonal block */
1735     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1736     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1737   }
1738 
1739   PetscFunctionReturn(0);
1740 }
1741 
1742 #undef __FUNCT__
1743 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1744 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1745 {
1746   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1747   PetscErrorCode ierr;
1748 
1749   PetscFunctionBegin;
1750   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1751   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1752   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
1753   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
1754   PetscFunctionReturn(0);
1755 }
1756 #undef __FUNCT__
1757 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1758 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1759 {
1760   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1761   PetscErrorCode ierr;
1762 
1763   PetscFunctionBegin;
1764   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1765   PetscFunctionReturn(0);
1766 }
1767 
1768 #undef __FUNCT__
1769 #define __FUNCT__ "MatEqual_MPIAIJ"
1770 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1771 {
1772   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1773   Mat            a,b,c,d;
1774   PetscTruth     flg;
1775   PetscErrorCode ierr;
1776 
1777   PetscFunctionBegin;
1778   a = matA->A; b = matA->B;
1779   c = matB->A; d = matB->B;
1780 
1781   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1782   if (flg) {
1783     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1784   }
1785   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1786   PetscFunctionReturn(0);
1787 }
1788 
1789 #undef __FUNCT__
1790 #define __FUNCT__ "MatCopy_MPIAIJ"
1791 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1792 {
1793   PetscErrorCode ierr;
1794   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1795   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1796 
1797   PetscFunctionBegin;
1798   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1799   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1800     /* because of the column compression in the off-processor part of the matrix a->B,
1801        the number of columns in a->B and b->B may be different, hence we cannot call
1802        the MatCopy() directly on the two parts. If need be, we can provide a more
1803        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1804        then copying the submatrices */
1805     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1806   } else {
1807     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1808     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1809   }
1810   PetscFunctionReturn(0);
1811 }
1812 
1813 #undef __FUNCT__
1814 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1815 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1816 {
1817   PetscErrorCode ierr;
1818 
1819   PetscFunctionBegin;
1820   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1821   PetscFunctionReturn(0);
1822 }
1823 
1824 #include "petscblaslapack.h"
1825 #undef __FUNCT__
1826 #define __FUNCT__ "MatAXPY_MPIAIJ"
1827 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1828 {
1829   PetscErrorCode ierr;
1830   PetscInt       i;
1831   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1832   PetscBLASInt   bnz,one=1;
1833   Mat_SeqAIJ     *x,*y;
1834 
1835   PetscFunctionBegin;
1836   if (str == SAME_NONZERO_PATTERN) {
1837     PetscScalar alpha = a;
1838     x = (Mat_SeqAIJ *)xx->A->data;
1839     y = (Mat_SeqAIJ *)yy->A->data;
1840     bnz = PetscBLASIntCast(x->nz);
1841     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1842     x = (Mat_SeqAIJ *)xx->B->data;
1843     y = (Mat_SeqAIJ *)yy->B->data;
1844     bnz = PetscBLASIntCast(x->nz);
1845     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1846   } else if (str == SUBSET_NONZERO_PATTERN) {
1847     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1848 
1849     x = (Mat_SeqAIJ *)xx->B->data;
1850     y = (Mat_SeqAIJ *)yy->B->data;
1851     if (y->xtoy && y->XtoY != xx->B) {
1852       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1853       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1854     }
1855     if (!y->xtoy) { /* get xtoy */
1856       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1857       y->XtoY = xx->B;
1858       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1859     }
1860     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1861   } else {
1862     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1863   }
1864   PetscFunctionReturn(0);
1865 }
1866 
1867 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1868 
1869 #undef __FUNCT__
1870 #define __FUNCT__ "MatConjugate_MPIAIJ"
1871 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1872 {
1873 #if defined(PETSC_USE_COMPLEX)
1874   PetscErrorCode ierr;
1875   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1876 
1877   PetscFunctionBegin;
1878   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1879   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1880 #else
1881   PetscFunctionBegin;
1882 #endif
1883   PetscFunctionReturn(0);
1884 }
1885 
1886 #undef __FUNCT__
1887 #define __FUNCT__ "MatRealPart_MPIAIJ"
1888 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1889 {
1890   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1891   PetscErrorCode ierr;
1892 
1893   PetscFunctionBegin;
1894   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1895   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 #undef __FUNCT__
1900 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1901 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1902 {
1903   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1904   PetscErrorCode ierr;
1905 
1906   PetscFunctionBegin;
1907   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1908   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1909   PetscFunctionReturn(0);
1910 }
1911 
1912 #ifdef PETSC_HAVE_PBGL
1913 
1914 #include <boost/parallel/mpi/bsp_process_group.hpp>
1915 #include <boost/graph/distributed/ilu_default_graph.hpp>
1916 #include <boost/graph/distributed/ilu_0_block.hpp>
1917 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1918 #include <boost/graph/distributed/petsc/interface.hpp>
1919 #include <boost/multi_array.hpp>
1920 #include <boost/parallel/distributed_property_map->hpp>
1921 
1922 #undef __FUNCT__
1923 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1924 /*
1925   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1926 */
1927 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1928 {
1929   namespace petsc = boost::distributed::petsc;
1930 
1931   namespace graph_dist = boost::graph::distributed;
1932   using boost::graph::distributed::ilu_default::process_group_type;
1933   using boost::graph::ilu_permuted;
1934 
1935   PetscTruth      row_identity, col_identity;
1936   PetscContainer  c;
1937   PetscInt        m, n, M, N;
1938   PetscErrorCode  ierr;
1939 
1940   PetscFunctionBegin;
1941   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1942   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1943   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1944   if (!row_identity || !col_identity) {
1945     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1946   }
1947 
1948   process_group_type pg;
1949   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1950   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1951   lgraph_type&   level_graph = *lgraph_p;
1952   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1953 
1954   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1955   ilu_permuted(level_graph);
1956 
1957   /* put together the new matrix */
1958   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1959   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1960   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1961   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1962   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1963   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1964   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1965 
1966   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1967   ierr = PetscContainerSetPointer(c, lgraph_p);
1968   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1969   PetscFunctionReturn(0);
1970 }
1971 
1972 #undef __FUNCT__
1973 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1974 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1975 {
1976   PetscFunctionBegin;
1977   PetscFunctionReturn(0);
1978 }
1979 
1980 #undef __FUNCT__
1981 #define __FUNCT__ "MatSolve_MPIAIJ"
1982 /*
1983   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1984 */
1985 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1986 {
1987   namespace graph_dist = boost::graph::distributed;
1988 
1989   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1990   lgraph_type*   lgraph_p;
1991   PetscContainer c;
1992   PetscErrorCode ierr;
1993 
1994   PetscFunctionBegin;
1995   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1996   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1997   ierr = VecCopy(b, x);CHKERRQ(ierr);
1998 
1999   PetscScalar* array_x;
2000   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2001   PetscInt sx;
2002   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2003 
2004   PetscScalar* array_b;
2005   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2006   PetscInt sb;
2007   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2008 
2009   lgraph_type&   level_graph = *lgraph_p;
2010   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2011 
2012   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2013   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2014                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2015 
2016   typedef boost::iterator_property_map<array_ref_type::iterator,
2017                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2018   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2019                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2020 
2021   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2022 
2023   PetscFunctionReturn(0);
2024 }
2025 #endif
2026 
2027 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2028   PetscInt       nzlocal,nsends,nrecvs;
2029   PetscMPIInt    *send_rank,*recv_rank;
2030   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2031   PetscScalar    *sbuf_a,**rbuf_a;
2032   PetscErrorCode (*MatDestroy)(Mat);
2033 } Mat_Redundant;
2034 
2035 #undef __FUNCT__
2036 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2037 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2038 {
2039   PetscErrorCode       ierr;
2040   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2041   PetscInt             i;
2042 
2043   PetscFunctionBegin;
2044   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2045   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2046   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2047   for (i=0; i<redund->nrecvs; i++){
2048     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2049     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2050   }
2051   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2052   ierr = PetscFree(redund);CHKERRQ(ierr);
2053   PetscFunctionReturn(0);
2054 }
2055 
2056 #undef __FUNCT__
2057 #define __FUNCT__ "MatDestroy_MatRedundant"
2058 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2059 {
2060   PetscErrorCode  ierr;
2061   PetscContainer  container;
2062   Mat_Redundant   *redund=PETSC_NULL;
2063 
2064   PetscFunctionBegin;
2065   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2066   if (container) {
2067     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2068   } else {
2069     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2070   }
2071   A->ops->destroy = redund->MatDestroy;
2072   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2073   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2074   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2075   PetscFunctionReturn(0);
2076 }
2077 
2078 #undef __FUNCT__
2079 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2080 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2081 {
2082   PetscMPIInt    rank,size;
2083   MPI_Comm       comm=((PetscObject)mat)->comm;
2084   PetscErrorCode ierr;
2085   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2086   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2087   PetscInt       *rowrange=mat->rmap->range;
2088   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2089   Mat            A=aij->A,B=aij->B,C=*matredundant;
2090   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2091   PetscScalar    *sbuf_a;
2092   PetscInt       nzlocal=a->nz+b->nz;
2093   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2094   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2095   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2096   MatScalar      *aworkA,*aworkB;
2097   PetscScalar    *vals;
2098   PetscMPIInt    tag1,tag2,tag3,imdex;
2099   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2100                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2101   MPI_Status     recv_status,*send_status;
2102   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2103   PetscInt       **rbuf_j=PETSC_NULL;
2104   PetscScalar    **rbuf_a=PETSC_NULL;
2105   Mat_Redundant  *redund=PETSC_NULL;
2106   PetscContainer container;
2107 
2108   PetscFunctionBegin;
2109   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2110   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2111 
2112   if (reuse == MAT_REUSE_MATRIX) {
2113     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2114     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2115     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2116     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2117     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2118     if (container) {
2119       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2120     } else {
2121       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2122     }
2123     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2124 
2125     nsends    = redund->nsends;
2126     nrecvs    = redund->nrecvs;
2127     send_rank = redund->send_rank;
2128     recv_rank = redund->recv_rank;
2129     sbuf_nz   = redund->sbuf_nz;
2130     rbuf_nz   = redund->rbuf_nz;
2131     sbuf_j    = redund->sbuf_j;
2132     sbuf_a    = redund->sbuf_a;
2133     rbuf_j    = redund->rbuf_j;
2134     rbuf_a    = redund->rbuf_a;
2135   }
2136 
2137   if (reuse == MAT_INITIAL_MATRIX){
2138     PetscMPIInt  subrank,subsize;
2139     PetscInt     nleftover,np_subcomm;
2140     /* get the destination processors' id send_rank, nsends and nrecvs */
2141     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2142     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2143     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2144     np_subcomm = size/nsubcomm;
2145     nleftover  = size - nsubcomm*np_subcomm;
2146     nsends = 0; nrecvs = 0;
2147     for (i=0; i<size; i++){ /* i=rank*/
2148       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2149         send_rank[nsends] = i; nsends++;
2150         recv_rank[nrecvs++] = i;
2151       }
2152     }
2153     if (rank >= size - nleftover){/* this proc is a leftover processor */
2154       i = size-nleftover-1;
2155       j = 0;
2156       while (j < nsubcomm - nleftover){
2157         send_rank[nsends++] = i;
2158         i--; j++;
2159       }
2160     }
2161 
2162     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2163       for (i=0; i<nleftover; i++){
2164         recv_rank[nrecvs++] = size-nleftover+i;
2165       }
2166     }
2167 
2168     /* allocate sbuf_j, sbuf_a */
2169     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2170     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2171     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2172   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2173 
2174   /* copy mat's local entries into the buffers */
2175   if (reuse == MAT_INITIAL_MATRIX){
2176     rownz_max = 0;
2177     rptr = sbuf_j;
2178     cols = sbuf_j + rend-rstart + 1;
2179     vals = sbuf_a;
2180     rptr[0] = 0;
2181     for (i=0; i<rend-rstart; i++){
2182       row = i + rstart;
2183       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2184       ncols  = nzA + nzB;
2185       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2186       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2187       /* load the column indices for this row into cols */
2188       lwrite = 0;
2189       for (l=0; l<nzB; l++) {
2190         if ((ctmp = bmap[cworkB[l]]) < cstart){
2191           vals[lwrite]   = aworkB[l];
2192           cols[lwrite++] = ctmp;
2193         }
2194       }
2195       for (l=0; l<nzA; l++){
2196         vals[lwrite]   = aworkA[l];
2197         cols[lwrite++] = cstart + cworkA[l];
2198       }
2199       for (l=0; l<nzB; l++) {
2200         if ((ctmp = bmap[cworkB[l]]) >= cend){
2201           vals[lwrite]   = aworkB[l];
2202           cols[lwrite++] = ctmp;
2203         }
2204       }
2205       vals += ncols;
2206       cols += ncols;
2207       rptr[i+1] = rptr[i] + ncols;
2208       if (rownz_max < ncols) rownz_max = ncols;
2209     }
2210     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2211   } else { /* only copy matrix values into sbuf_a */
2212     rptr = sbuf_j;
2213     vals = sbuf_a;
2214     rptr[0] = 0;
2215     for (i=0; i<rend-rstart; i++){
2216       row = i + rstart;
2217       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2218       ncols  = nzA + nzB;
2219       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2220       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2221       lwrite = 0;
2222       for (l=0; l<nzB; l++) {
2223         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2224       }
2225       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2226       for (l=0; l<nzB; l++) {
2227         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2228       }
2229       vals += ncols;
2230       rptr[i+1] = rptr[i] + ncols;
2231     }
2232   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2233 
2234   /* send nzlocal to others, and recv other's nzlocal */
2235   /*--------------------------------------------------*/
2236   if (reuse == MAT_INITIAL_MATRIX){
2237     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2238     s_waits2 = s_waits3 + nsends;
2239     s_waits1 = s_waits2 + nsends;
2240     r_waits1 = s_waits1 + nsends;
2241     r_waits2 = r_waits1 + nrecvs;
2242     r_waits3 = r_waits2 + nrecvs;
2243   } else {
2244     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2245     r_waits3 = s_waits3 + nsends;
2246   }
2247 
2248   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2249   if (reuse == MAT_INITIAL_MATRIX){
2250     /* get new tags to keep the communication clean */
2251     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2252     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2253     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2254 
2255     /* post receives of other's nzlocal */
2256     for (i=0; i<nrecvs; i++){
2257       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2258     }
2259     /* send nzlocal to others */
2260     for (i=0; i<nsends; i++){
2261       sbuf_nz[i] = nzlocal;
2262       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2263     }
2264     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2265     count = nrecvs;
2266     while (count) {
2267       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2268       recv_rank[imdex] = recv_status.MPI_SOURCE;
2269       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2270       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2271 
2272       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2273       rbuf_nz[imdex] += i + 2;
2274       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2275       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2276       count--;
2277     }
2278     /* wait on sends of nzlocal */
2279     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2280     /* send mat->i,j to others, and recv from other's */
2281     /*------------------------------------------------*/
2282     for (i=0; i<nsends; i++){
2283       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2284       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2285     }
2286     /* wait on receives of mat->i,j */
2287     /*------------------------------*/
2288     count = nrecvs;
2289     while (count) {
2290       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2291       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2292       count--;
2293     }
2294     /* wait on sends of mat->i,j */
2295     /*---------------------------*/
2296     if (nsends) {
2297       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2298     }
2299   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2300 
2301   /* post receives, send and receive mat->a */
2302   /*----------------------------------------*/
2303   for (imdex=0; imdex<nrecvs; imdex++) {
2304     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2305   }
2306   for (i=0; i<nsends; i++){
2307     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2308   }
2309   count = nrecvs;
2310   while (count) {
2311     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2312     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2313     count--;
2314   }
2315   if (nsends) {
2316     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2317   }
2318 
2319   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2320 
2321   /* create redundant matrix */
2322   /*-------------------------*/
2323   if (reuse == MAT_INITIAL_MATRIX){
2324     /* compute rownz_max for preallocation */
2325     for (imdex=0; imdex<nrecvs; imdex++){
2326       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2327       rptr = rbuf_j[imdex];
2328       for (i=0; i<j; i++){
2329         ncols = rptr[i+1] - rptr[i];
2330         if (rownz_max < ncols) rownz_max = ncols;
2331       }
2332     }
2333 
2334     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2335     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2336     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2337     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2338     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2339   } else {
2340     C = *matredundant;
2341   }
2342 
2343   /* insert local matrix entries */
2344   rptr = sbuf_j;
2345   cols = sbuf_j + rend-rstart + 1;
2346   vals = sbuf_a;
2347   for (i=0; i<rend-rstart; i++){
2348     row   = i + rstart;
2349     ncols = rptr[i+1] - rptr[i];
2350     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2351     vals += ncols;
2352     cols += ncols;
2353   }
2354   /* insert received matrix entries */
2355   for (imdex=0; imdex<nrecvs; imdex++){
2356     rstart = rowrange[recv_rank[imdex]];
2357     rend   = rowrange[recv_rank[imdex]+1];
2358     rptr = rbuf_j[imdex];
2359     cols = rbuf_j[imdex] + rend-rstart + 1;
2360     vals = rbuf_a[imdex];
2361     for (i=0; i<rend-rstart; i++){
2362       row   = i + rstart;
2363       ncols = rptr[i+1] - rptr[i];
2364       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2365       vals += ncols;
2366       cols += ncols;
2367     }
2368   }
2369   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2370   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2371   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2372   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2373   if (reuse == MAT_INITIAL_MATRIX){
2374     PetscContainer container;
2375     *matredundant = C;
2376     /* create a supporting struct and attach it to C for reuse */
2377     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2378     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2379     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2380     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2381     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2382 
2383     redund->nzlocal = nzlocal;
2384     redund->nsends  = nsends;
2385     redund->nrecvs  = nrecvs;
2386     redund->send_rank = send_rank;
2387     redund->recv_rank = recv_rank;
2388     redund->sbuf_nz = sbuf_nz;
2389     redund->rbuf_nz = rbuf_nz;
2390     redund->sbuf_j  = sbuf_j;
2391     redund->sbuf_a  = sbuf_a;
2392     redund->rbuf_j  = rbuf_j;
2393     redund->rbuf_a  = rbuf_a;
2394 
2395     redund->MatDestroy = C->ops->destroy;
2396     C->ops->destroy    = MatDestroy_MatRedundant;
2397   }
2398   PetscFunctionReturn(0);
2399 }
2400 
2401 #undef __FUNCT__
2402 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2403 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2404 {
2405   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2406   PetscErrorCode ierr;
2407   PetscInt       i,*idxb = 0;
2408   PetscScalar    *va,*vb;
2409   Vec            vtmp;
2410 
2411   PetscFunctionBegin;
2412   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2413   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2414   if (idx) {
2415     for (i=0; i<A->rmap->n; i++) {
2416       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2417     }
2418   }
2419 
2420   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2421   if (idx) {
2422     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2423   }
2424   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2425   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2426 
2427   for (i=0; i<A->rmap->n; i++){
2428     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2429       va[i] = vb[i];
2430       if (idx) idx[i] = a->garray[idxb[i]];
2431     }
2432   }
2433 
2434   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2435   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2436   if (idxb) {
2437     ierr = PetscFree(idxb);CHKERRQ(ierr);
2438   }
2439   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2440   PetscFunctionReturn(0);
2441 }
2442 
2443 #undef __FUNCT__
2444 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2445 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2446 {
2447   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2448   PetscErrorCode ierr;
2449   PetscInt       i,*idxb = 0;
2450   PetscScalar    *va,*vb;
2451   Vec            vtmp;
2452 
2453   PetscFunctionBegin;
2454   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2455   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2456   if (idx) {
2457     for (i=0; i<A->cmap->n; i++) {
2458       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2459     }
2460   }
2461 
2462   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2463   if (idx) {
2464     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2465   }
2466   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2467   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2468 
2469   for (i=0; i<A->rmap->n; i++){
2470     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2471       va[i] = vb[i];
2472       if (idx) idx[i] = a->garray[idxb[i]];
2473     }
2474   }
2475 
2476   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2477   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2478   if (idxb) {
2479     ierr = PetscFree(idxb);CHKERRQ(ierr);
2480   }
2481   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2482   PetscFunctionReturn(0);
2483 }
2484 
2485 #undef __FUNCT__
2486 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2487 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2488 {
2489   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2490   PetscInt       n      = A->rmap->n;
2491   PetscInt       cstart = A->cmap->rstart;
2492   PetscInt      *cmap   = mat->garray;
2493   PetscInt      *diagIdx, *offdiagIdx;
2494   Vec            diagV, offdiagV;
2495   PetscScalar   *a, *diagA, *offdiagA;
2496   PetscInt       r;
2497   PetscErrorCode ierr;
2498 
2499   PetscFunctionBegin;
2500   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2501   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2502   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2503   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2504   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2505   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2506   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2507   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2508   for(r = 0; r < n; ++r) {
2509     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2510       a[r]   = diagA[r];
2511       idx[r] = cstart + diagIdx[r];
2512     } else {
2513       a[r]   = offdiagA[r];
2514       idx[r] = cmap[offdiagIdx[r]];
2515     }
2516   }
2517   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2518   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2519   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2520   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2521   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2522   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2523   PetscFunctionReturn(0);
2524 }
2525 
2526 #undef __FUNCT__
2527 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2528 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2529 {
2530   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2531   PetscInt       n      = A->rmap->n;
2532   PetscInt       cstart = A->cmap->rstart;
2533   PetscInt      *cmap   = mat->garray;
2534   PetscInt      *diagIdx, *offdiagIdx;
2535   Vec            diagV, offdiagV;
2536   PetscScalar   *a, *diagA, *offdiagA;
2537   PetscInt       r;
2538   PetscErrorCode ierr;
2539 
2540   PetscFunctionBegin;
2541   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2542   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2543   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2544   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2545   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2546   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2547   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2548   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2549   for(r = 0; r < n; ++r) {
2550     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2551       a[r]   = diagA[r];
2552       idx[r] = cstart + diagIdx[r];
2553     } else {
2554       a[r]   = offdiagA[r];
2555       idx[r] = cmap[offdiagIdx[r]];
2556     }
2557   }
2558   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2559   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2560   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2561   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2562   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2563   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2564   PetscFunctionReturn(0);
2565 }
2566 
2567 #undef __FUNCT__
2568 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2569 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2570 {
2571   PetscErrorCode ierr;
2572   Mat            *dummy;
2573 
2574   PetscFunctionBegin;
2575   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2576   *newmat = *dummy;
2577   ierr = PetscFree(dummy);CHKERRQ(ierr);
2578   PetscFunctionReturn(0);
2579 }
2580 
2581 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2582 /* -------------------------------------------------------------------*/
2583 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2584        MatGetRow_MPIAIJ,
2585        MatRestoreRow_MPIAIJ,
2586        MatMult_MPIAIJ,
2587 /* 4*/ MatMultAdd_MPIAIJ,
2588        MatMultTranspose_MPIAIJ,
2589        MatMultTransposeAdd_MPIAIJ,
2590 #ifdef PETSC_HAVE_PBGL
2591        MatSolve_MPIAIJ,
2592 #else
2593        0,
2594 #endif
2595        0,
2596        0,
2597 /*10*/ 0,
2598        0,
2599        0,
2600        MatSOR_MPIAIJ,
2601        MatTranspose_MPIAIJ,
2602 /*15*/ MatGetInfo_MPIAIJ,
2603        MatEqual_MPIAIJ,
2604        MatGetDiagonal_MPIAIJ,
2605        MatDiagonalScale_MPIAIJ,
2606        MatNorm_MPIAIJ,
2607 /*20*/ MatAssemblyBegin_MPIAIJ,
2608        MatAssemblyEnd_MPIAIJ,
2609        MatSetOption_MPIAIJ,
2610        MatZeroEntries_MPIAIJ,
2611 /*24*/ MatZeroRows_MPIAIJ,
2612        0,
2613 #ifdef PETSC_HAVE_PBGL
2614        0,
2615 #else
2616        0,
2617 #endif
2618        0,
2619        0,
2620 /*29*/ MatSetUpPreallocation_MPIAIJ,
2621 #ifdef PETSC_HAVE_PBGL
2622        0,
2623 #else
2624        0,
2625 #endif
2626        0,
2627        0,
2628        0,
2629 /*34*/ MatDuplicate_MPIAIJ,
2630        0,
2631        0,
2632        0,
2633        0,
2634 /*39*/ MatAXPY_MPIAIJ,
2635        MatGetSubMatrices_MPIAIJ,
2636        MatIncreaseOverlap_MPIAIJ,
2637        MatGetValues_MPIAIJ,
2638        MatCopy_MPIAIJ,
2639 /*44*/ MatGetRowMax_MPIAIJ,
2640        MatScale_MPIAIJ,
2641        0,
2642        0,
2643        0,
2644 /*49*/ MatSetBlockSize_MPIAIJ,
2645        0,
2646        0,
2647        0,
2648        0,
2649 /*54*/ MatFDColoringCreate_MPIAIJ,
2650        0,
2651        MatSetUnfactored_MPIAIJ,
2652        MatPermute_MPIAIJ,
2653        0,
2654 /*59*/ MatGetSubMatrix_MPIAIJ,
2655        MatDestroy_MPIAIJ,
2656        MatView_MPIAIJ,
2657        0,
2658        0,
2659 /*64*/ 0,
2660        0,
2661        0,
2662        0,
2663        0,
2664 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2665        MatGetRowMinAbs_MPIAIJ,
2666        0,
2667        MatSetColoring_MPIAIJ,
2668 #if defined(PETSC_HAVE_ADIC)
2669        MatSetValuesAdic_MPIAIJ,
2670 #else
2671        0,
2672 #endif
2673        MatSetValuesAdifor_MPIAIJ,
2674 /*75*/ MatFDColoringApply_AIJ,
2675        0,
2676        0,
2677        0,
2678        0,
2679 /*80*/ 0,
2680        0,
2681        0,
2682 /*83*/ MatLoad_MPIAIJ,
2683        0,
2684        0,
2685        0,
2686        0,
2687        0,
2688 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2689        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2690        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2691        MatPtAP_Basic,
2692        MatPtAPSymbolic_MPIAIJ,
2693 /*94*/ MatPtAPNumeric_MPIAIJ,
2694        0,
2695        0,
2696        0,
2697        0,
2698 /*99*/ 0,
2699        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2700        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2701        MatConjugate_MPIAIJ,
2702        0,
2703 /*104*/MatSetValuesRow_MPIAIJ,
2704        MatRealPart_MPIAIJ,
2705        MatImaginaryPart_MPIAIJ,
2706        0,
2707        0,
2708 /*109*/0,
2709        MatGetRedundantMatrix_MPIAIJ,
2710        MatGetRowMin_MPIAIJ,
2711        0,
2712        0,
2713 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2714        0,
2715        0,
2716        0,
2717        0,
2718        0
2719 };
2720 
2721 /* ----------------------------------------------------------------------------------------*/
2722 
2723 EXTERN_C_BEGIN
2724 #undef __FUNCT__
2725 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2726 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2727 {
2728   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2729   PetscErrorCode ierr;
2730 
2731   PetscFunctionBegin;
2732   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2733   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2734   PetscFunctionReturn(0);
2735 }
2736 EXTERN_C_END
2737 
2738 EXTERN_C_BEGIN
2739 #undef __FUNCT__
2740 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2741 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2742 {
2743   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2744   PetscErrorCode ierr;
2745 
2746   PetscFunctionBegin;
2747   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2748   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2749   PetscFunctionReturn(0);
2750 }
2751 EXTERN_C_END
2752 
2753 #include "petscpc.h"
2754 EXTERN_C_BEGIN
2755 #undef __FUNCT__
2756 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2757 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2758 {
2759   Mat_MPIAIJ     *b;
2760   PetscErrorCode ierr;
2761   PetscInt       i;
2762 
2763   PetscFunctionBegin;
2764   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2765   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2766   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2767   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2768 
2769   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2770   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2771   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2772   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2773   if (d_nnz) {
2774     for (i=0; i<B->rmap->n; i++) {
2775       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2776     }
2777   }
2778   if (o_nnz) {
2779     for (i=0; i<B->rmap->n; i++) {
2780       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2781     }
2782   }
2783   b = (Mat_MPIAIJ*)B->data;
2784 
2785   if (!B->preallocated) {
2786     /* Explicitly create 2 MATSEQAIJ matrices. */
2787     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2788     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2789     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2790     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2791     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2792     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2793     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2794     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2795   }
2796 
2797   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2798   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2799   B->preallocated = PETSC_TRUE;
2800   PetscFunctionReturn(0);
2801 }
2802 EXTERN_C_END
2803 
2804 #undef __FUNCT__
2805 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2806 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2807 {
2808   Mat            mat;
2809   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2810   PetscErrorCode ierr;
2811 
2812   PetscFunctionBegin;
2813   *newmat       = 0;
2814   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2815   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2816   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2817   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2818   a    = (Mat_MPIAIJ*)mat->data;
2819 
2820   mat->factor       = matin->factor;
2821   mat->rmap->bs      = matin->rmap->bs;
2822   mat->assembled    = PETSC_TRUE;
2823   mat->insertmode   = NOT_SET_VALUES;
2824   mat->preallocated = PETSC_TRUE;
2825 
2826   a->size           = oldmat->size;
2827   a->rank           = oldmat->rank;
2828   a->donotstash     = oldmat->donotstash;
2829   a->roworiented    = oldmat->roworiented;
2830   a->rowindices     = 0;
2831   a->rowvalues      = 0;
2832   a->getrowactive   = PETSC_FALSE;
2833 
2834   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2835   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2836 
2837   if (oldmat->colmap) {
2838 #if defined (PETSC_USE_CTABLE)
2839     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2840 #else
2841     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2842     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2843     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2844 #endif
2845   } else a->colmap = 0;
2846   if (oldmat->garray) {
2847     PetscInt len;
2848     len  = oldmat->B->cmap->n;
2849     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2850     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2851     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2852   } else a->garray = 0;
2853 
2854   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2855   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2856   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2857   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2858   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2859   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2860   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2861   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2862   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2863   *newmat = mat;
2864   PetscFunctionReturn(0);
2865 }
2866 
2867 #include "petscsys.h"
2868 
2869 #undef __FUNCT__
2870 #define __FUNCT__ "MatLoad_MPIAIJ"
2871 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2872 {
2873   Mat            A;
2874   PetscScalar    *vals,*svals;
2875   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2876   MPI_Status     status;
2877   PetscErrorCode ierr;
2878   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz;
2879   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2880   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2881   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2882   PetscInt       cend,cstart,n,*rowners;
2883   int            fd;
2884 
2885   PetscFunctionBegin;
2886   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2887   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2888   if (!rank) {
2889     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2890     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2891     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2892   }
2893 
2894   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2895   M = header[1]; N = header[2];
2896   /* determine ownership of all rows */
2897   m    = M/size + ((M % size) > rank);
2898   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2899   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2900 
2901   /* First process needs enough room for process with most rows */
2902   if (!rank) {
2903     mmax       = rowners[1];
2904     for (i=2; i<size; i++) {
2905       mmax = PetscMax(mmax,rowners[i]);
2906     }
2907   } else mmax = m;
2908 
2909   rowners[0] = 0;
2910   for (i=2; i<=size; i++) {
2911     rowners[i] += rowners[i-1];
2912   }
2913   rstart = rowners[rank];
2914   rend   = rowners[rank+1];
2915 
2916   /* distribute row lengths to all processors */
2917   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2918   if (!rank) {
2919     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2920     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2921     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2922     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2923     for (j=0; j<m; j++) {
2924       procsnz[0] += ourlens[j];
2925     }
2926     for (i=1; i<size; i++) {
2927       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2928       /* calculate the number of nonzeros on each processor */
2929       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2930         procsnz[i] += rowlengths[j];
2931       }
2932       mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]);
2933       ierr   = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2934     }
2935     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2936   } else {
2937     mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr);
2938     ierr   = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2939   }
2940 
2941   if (!rank) {
2942     /* determine max buffer needed and allocate it */
2943     maxnz = 0;
2944     for (i=0; i<size; i++) {
2945       maxnz = PetscMax(maxnz,procsnz[i]);
2946     }
2947     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2948 
2949     /* read in my part of the matrix column indices  */
2950     nz   = procsnz[0];
2951     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2952     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2953 
2954     /* read in every one elses and ship off */
2955     for (i=1; i<size; i++) {
2956       nz     = procsnz[i];
2957       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2958       mpicnt = PetscMPIIntCast(nz);
2959       ierr   = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2960     }
2961     ierr = PetscFree(cols);CHKERRQ(ierr);
2962   } else {
2963     /* determine buffer space needed for message */
2964     nz = 0;
2965     for (i=0; i<m; i++) {
2966       nz += ourlens[i];
2967     }
2968     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2969 
2970     /* receive message of column indices*/
2971     mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr);
2972     ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2973     ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr);
2974     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
2975     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
2976     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
2977   }
2978 
2979   /* determine column ownership if matrix is not square */
2980   if (N != M) {
2981     n      = N/size + ((N % size) > rank);
2982     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2983     cstart = cend - n;
2984   } else {
2985     cstart = rstart;
2986     cend   = rend;
2987     n      = cend - cstart;
2988   }
2989 
2990   /* loop over local rows, determining number of off diagonal entries */
2991   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2992   jj = 0;
2993   for (i=0; i<m; i++) {
2994     for (j=0; j<ourlens[i]; j++) {
2995       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2996       jj++;
2997     }
2998   }
2999 
3000   /* create our matrix */
3001   for (i=0; i<m; i++) {
3002     ourlens[i] -= offlens[i];
3003   }
3004   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
3005   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
3006   ierr = MatSetType(A,type);CHKERRQ(ierr);
3007   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
3008 
3009   for (i=0; i<m; i++) {
3010     ourlens[i] += offlens[i];
3011   }
3012 
3013   if (!rank) {
3014     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3015 
3016     /* read in my part of the matrix numerical values  */
3017     nz   = procsnz[0];
3018     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3019 
3020     /* insert into matrix */
3021     jj      = rstart;
3022     smycols = mycols;
3023     svals   = vals;
3024     for (i=0; i<m; i++) {
3025       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3026       smycols += ourlens[i];
3027       svals   += ourlens[i];
3028       jj++;
3029     }
3030 
3031     /* read in other processors and ship out */
3032     for (i=1; i<size; i++) {
3033       nz     = procsnz[i];
3034       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3035       mpicnt = PetscMPIIntCast(nz);
3036       ierr   = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3037     }
3038     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3039   } else {
3040     /* receive numeric values */
3041     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3042 
3043     /* receive message of values*/
3044     mpicnt = PetscMPIIntCast(nz);
3045     ierr   = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
3046     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr);
3047     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
3048     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
3049     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
3050 
3051     /* insert into matrix */
3052     jj      = rstart;
3053     smycols = mycols;
3054     svals   = vals;
3055     for (i=0; i<m; i++) {
3056       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3057       smycols += ourlens[i];
3058       svals   += ourlens[i];
3059       jj++;
3060     }
3061   }
3062   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3063   ierr = PetscFree(vals);CHKERRQ(ierr);
3064   ierr = PetscFree(mycols);CHKERRQ(ierr);
3065   ierr = PetscFree(rowners);CHKERRQ(ierr);
3066 
3067   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3068   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3069   *newmat = A;
3070   PetscFunctionReturn(0);
3071 }
3072 
3073 #undef __FUNCT__
3074 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3075 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3076 {
3077   PetscErrorCode ierr;
3078   IS             iscol_local;
3079   PetscInt       csize;
3080 
3081   PetscFunctionBegin;
3082   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3083   if (call == MAT_REUSE_MATRIX) {
3084     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3085     if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3086   } else {
3087     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3088   }
3089   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3090   if (call == MAT_INITIAL_MATRIX) {
3091     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3092     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3093   }
3094   PetscFunctionReturn(0);
3095 }
3096 
3097 #undef __FUNCT__
3098 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3099 /*
3100     Not great since it makes two copies of the submatrix, first an SeqAIJ
3101   in local and then by concatenating the local matrices the end result.
3102   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3103 
3104   Note: This requires a sequential iscol with all indices.
3105 */
3106 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3107 {
3108   PetscErrorCode ierr;
3109   PetscMPIInt    rank,size;
3110   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3111   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3112   Mat            *local,M,Mreuse;
3113   MatScalar      *vwork,*aa;
3114   MPI_Comm       comm = ((PetscObject)mat)->comm;
3115   Mat_SeqAIJ     *aij;
3116 
3117 
3118   PetscFunctionBegin;
3119   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3120   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3121 
3122   if (call ==  MAT_REUSE_MATRIX) {
3123     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3124     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3125     local = &Mreuse;
3126     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3127   } else {
3128     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3129     Mreuse = *local;
3130     ierr   = PetscFree(local);CHKERRQ(ierr);
3131   }
3132 
3133   /*
3134       m - number of local rows
3135       n - number of columns (same on all processors)
3136       rstart - first row in new global matrix generated
3137   */
3138   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3139   if (call == MAT_INITIAL_MATRIX) {
3140     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3141     ii  = aij->i;
3142     jj  = aij->j;
3143 
3144     /*
3145         Determine the number of non-zeros in the diagonal and off-diagonal
3146         portions of the matrix in order to do correct preallocation
3147     */
3148 
3149     /* first get start and end of "diagonal" columns */
3150     if (csize == PETSC_DECIDE) {
3151       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3152       if (mglobal == n) { /* square matrix */
3153 	nlocal = m;
3154       } else {
3155         nlocal = n/size + ((n % size) > rank);
3156       }
3157     } else {
3158       nlocal = csize;
3159     }
3160     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3161     rstart = rend - nlocal;
3162     if (rank == size - 1 && rend != n) {
3163       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3164     }
3165 
3166     /* next, compute all the lengths */
3167     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3168     olens = dlens + m;
3169     for (i=0; i<m; i++) {
3170       jend = ii[i+1] - ii[i];
3171       olen = 0;
3172       dlen = 0;
3173       for (j=0; j<jend; j++) {
3174         if (*jj < rstart || *jj >= rend) olen++;
3175         else dlen++;
3176         jj++;
3177       }
3178       olens[i] = olen;
3179       dlens[i] = dlen;
3180     }
3181     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3182     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3183     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3184     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3185     ierr = PetscFree(dlens);CHKERRQ(ierr);
3186   } else {
3187     PetscInt ml,nl;
3188 
3189     M = *newmat;
3190     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3191     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3192     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3193     /*
3194          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3195        rather than the slower MatSetValues().
3196     */
3197     M->was_assembled = PETSC_TRUE;
3198     M->assembled     = PETSC_FALSE;
3199   }
3200   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3201   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3202   ii  = aij->i;
3203   jj  = aij->j;
3204   aa  = aij->a;
3205   for (i=0; i<m; i++) {
3206     row   = rstart + i;
3207     nz    = ii[i+1] - ii[i];
3208     cwork = jj;     jj += nz;
3209     vwork = aa;     aa += nz;
3210     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3211   }
3212 
3213   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3214   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3215   *newmat = M;
3216 
3217   /* save submatrix used in processor for next request */
3218   if (call ==  MAT_INITIAL_MATRIX) {
3219     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3220     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3221   }
3222 
3223   PetscFunctionReturn(0);
3224 }
3225 
3226 EXTERN_C_BEGIN
3227 #undef __FUNCT__
3228 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3229 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3230 {
3231   PetscInt       m,cstart, cend,j,nnz,i,d;
3232   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3233   const PetscInt *JJ;
3234   PetscScalar    *values;
3235   PetscErrorCode ierr;
3236 
3237   PetscFunctionBegin;
3238   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3239 
3240   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3241   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3242   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3243   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3244   m      = B->rmap->n;
3245   cstart = B->cmap->rstart;
3246   cend   = B->cmap->rend;
3247   rstart = B->rmap->rstart;
3248 
3249   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3250 
3251 #if defined(PETSC_USE_DEBUGGING)
3252   for (i=0; i<m; i++) {
3253     nnz     = Ii[i+1]- Ii[i];
3254     JJ      = J + Ii[i];
3255     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3256     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3257     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3258   }
3259 #endif
3260 
3261   for (i=0; i<m; i++) {
3262     nnz     = Ii[i+1]- Ii[i];
3263     JJ      = J + Ii[i];
3264     nnz_max = PetscMax(nnz_max,nnz);
3265     d       = 0;
3266     for (j=0; j<nnz; j++) {
3267       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3268     }
3269     d_nnz[i] = d;
3270     o_nnz[i] = nnz - d;
3271   }
3272   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3273   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3274 
3275   if (v) values = (PetscScalar*)v;
3276   else {
3277     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3278     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3279   }
3280 
3281   for (i=0; i<m; i++) {
3282     ii   = i + rstart;
3283     nnz  = Ii[i+1]- Ii[i];
3284     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3285   }
3286   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3287   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3288 
3289   if (!v) {
3290     ierr = PetscFree(values);CHKERRQ(ierr);
3291   }
3292   PetscFunctionReturn(0);
3293 }
3294 EXTERN_C_END
3295 
3296 #undef __FUNCT__
3297 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3298 /*@
3299    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3300    (the default parallel PETSc format).
3301 
3302    Collective on MPI_Comm
3303 
3304    Input Parameters:
3305 +  B - the matrix
3306 .  i - the indices into j for the start of each local row (starts with zero)
3307 .  j - the column indices for each local row (starts with zero)
3308 -  v - optional values in the matrix
3309 
3310    Level: developer
3311 
3312    Notes:
3313        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3314      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3315      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3316 
3317        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3318 
3319        The format which is used for the sparse matrix input, is equivalent to a
3320     row-major ordering.. i.e for the following matrix, the input data expected is
3321     as shown:
3322 
3323         1 0 0
3324         2 0 3     P0
3325        -------
3326         4 5 6     P1
3327 
3328      Process0 [P0]: rows_owned=[0,1]
3329         i =  {0,1,3}  [size = nrow+1  = 2+1]
3330         j =  {0,0,2}  [size = nz = 6]
3331         v =  {1,2,3}  [size = nz = 6]
3332 
3333      Process1 [P1]: rows_owned=[2]
3334         i =  {0,3}    [size = nrow+1  = 1+1]
3335         j =  {0,1,2}  [size = nz = 6]
3336         v =  {4,5,6}  [size = nz = 6]
3337 
3338 .keywords: matrix, aij, compressed row, sparse, parallel
3339 
3340 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3341           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3342 @*/
3343 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3344 {
3345   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3346 
3347   PetscFunctionBegin;
3348   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3349   if (f) {
3350     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3351   }
3352   PetscFunctionReturn(0);
3353 }
3354 
3355 #undef __FUNCT__
3356 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3357 /*@C
3358    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3359    (the default parallel PETSc format).  For good matrix assembly performance
3360    the user should preallocate the matrix storage by setting the parameters
3361    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3362    performance can be increased by more than a factor of 50.
3363 
3364    Collective on MPI_Comm
3365 
3366    Input Parameters:
3367 +  A - the matrix
3368 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3369            (same value is used for all local rows)
3370 .  d_nnz - array containing the number of nonzeros in the various rows of the
3371            DIAGONAL portion of the local submatrix (possibly different for each row)
3372            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3373            The size of this array is equal to the number of local rows, i.e 'm'.
3374            You must leave room for the diagonal entry even if it is zero.
3375 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3376            submatrix (same value is used for all local rows).
3377 -  o_nnz - array containing the number of nonzeros in the various rows of the
3378            OFF-DIAGONAL portion of the local submatrix (possibly different for
3379            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3380            structure. The size of this array is equal to the number
3381            of local rows, i.e 'm'.
3382 
3383    If the *_nnz parameter is given then the *_nz parameter is ignored
3384 
3385    The AIJ format (also called the Yale sparse matrix format or
3386    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3387    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3388 
3389    The parallel matrix is partitioned such that the first m0 rows belong to
3390    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3391    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3392 
3393    The DIAGONAL portion of the local submatrix of a processor can be defined
3394    as the submatrix which is obtained by extraction the part corresponding
3395    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3396    first row that belongs to the processor, and r2 is the last row belonging
3397    to the this processor. This is a square mxm matrix. The remaining portion
3398    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3399 
3400    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3401 
3402    You can call MatGetInfo() to get information on how effective the preallocation was;
3403    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3404    You can also run with the option -info and look for messages with the string
3405    malloc in them to see if additional memory allocation was needed.
3406 
3407    Example usage:
3408 
3409    Consider the following 8x8 matrix with 34 non-zero values, that is
3410    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3411    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3412    as follows:
3413 
3414 .vb
3415             1  2  0  |  0  3  0  |  0  4
3416     Proc0   0  5  6  |  7  0  0  |  8  0
3417             9  0 10  | 11  0  0  | 12  0
3418     -------------------------------------
3419            13  0 14  | 15 16 17  |  0  0
3420     Proc1   0 18  0  | 19 20 21  |  0  0
3421             0  0  0  | 22 23  0  | 24  0
3422     -------------------------------------
3423     Proc2  25 26 27  |  0  0 28  | 29  0
3424            30  0  0  | 31 32 33  |  0 34
3425 .ve
3426 
3427    This can be represented as a collection of submatrices as:
3428 
3429 .vb
3430       A B C
3431       D E F
3432       G H I
3433 .ve
3434 
3435    Where the submatrices A,B,C are owned by proc0, D,E,F are
3436    owned by proc1, G,H,I are owned by proc2.
3437 
3438    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3439    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3440    The 'M','N' parameters are 8,8, and have the same values on all procs.
3441 
3442    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3443    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3444    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3445    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3446    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3447    matrix, ans [DF] as another SeqAIJ matrix.
3448 
3449    When d_nz, o_nz parameters are specified, d_nz storage elements are
3450    allocated for every row of the local diagonal submatrix, and o_nz
3451    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3452    One way to choose d_nz and o_nz is to use the max nonzerors per local
3453    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3454    In this case, the values of d_nz,o_nz are:
3455 .vb
3456      proc0 : dnz = 2, o_nz = 2
3457      proc1 : dnz = 3, o_nz = 2
3458      proc2 : dnz = 1, o_nz = 4
3459 .ve
3460    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3461    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3462    for proc3. i.e we are using 12+15+10=37 storage locations to store
3463    34 values.
3464 
3465    When d_nnz, o_nnz parameters are specified, the storage is specified
3466    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3467    In the above case the values for d_nnz,o_nnz are:
3468 .vb
3469      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3470      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3471      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3472 .ve
3473    Here the space allocated is sum of all the above values i.e 34, and
3474    hence pre-allocation is perfect.
3475 
3476    Level: intermediate
3477 
3478 .keywords: matrix, aij, compressed row, sparse, parallel
3479 
3480 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3481           MPIAIJ, MatGetInfo()
3482 @*/
3483 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3484 {
3485   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3486 
3487   PetscFunctionBegin;
3488   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3489   if (f) {
3490     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3491   }
3492   PetscFunctionReturn(0);
3493 }
3494 
3495 #undef __FUNCT__
3496 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3497 /*@
3498      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3499          CSR format the local rows.
3500 
3501    Collective on MPI_Comm
3502 
3503    Input Parameters:
3504 +  comm - MPI communicator
3505 .  m - number of local rows (Cannot be PETSC_DECIDE)
3506 .  n - This value should be the same as the local size used in creating the
3507        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3508        calculated if N is given) For square matrices n is almost always m.
3509 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3510 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3511 .   i - row indices
3512 .   j - column indices
3513 -   a - matrix values
3514 
3515    Output Parameter:
3516 .   mat - the matrix
3517 
3518    Level: intermediate
3519 
3520    Notes:
3521        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3522      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3523      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3524 
3525        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3526 
3527        The format which is used for the sparse matrix input, is equivalent to a
3528     row-major ordering.. i.e for the following matrix, the input data expected is
3529     as shown:
3530 
3531         1 0 0
3532         2 0 3     P0
3533        -------
3534         4 5 6     P1
3535 
3536      Process0 [P0]: rows_owned=[0,1]
3537         i =  {0,1,3}  [size = nrow+1  = 2+1]
3538         j =  {0,0,2}  [size = nz = 6]
3539         v =  {1,2,3}  [size = nz = 6]
3540 
3541      Process1 [P1]: rows_owned=[2]
3542         i =  {0,3}    [size = nrow+1  = 1+1]
3543         j =  {0,1,2}  [size = nz = 6]
3544         v =  {4,5,6}  [size = nz = 6]
3545 
3546 .keywords: matrix, aij, compressed row, sparse, parallel
3547 
3548 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3549           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3550 @*/
3551 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3552 {
3553   PetscErrorCode ierr;
3554 
3555  PetscFunctionBegin;
3556   if (i[0]) {
3557     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3558   }
3559   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3560   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3561   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3562   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3563   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3564   PetscFunctionReturn(0);
3565 }
3566 
3567 #undef __FUNCT__
3568 #define __FUNCT__ "MatCreateMPIAIJ"
3569 /*@C
3570    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3571    (the default parallel PETSc format).  For good matrix assembly performance
3572    the user should preallocate the matrix storage by setting the parameters
3573    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3574    performance can be increased by more than a factor of 50.
3575 
3576    Collective on MPI_Comm
3577 
3578    Input Parameters:
3579 +  comm - MPI communicator
3580 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3581            This value should be the same as the local size used in creating the
3582            y vector for the matrix-vector product y = Ax.
3583 .  n - This value should be the same as the local size used in creating the
3584        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3585        calculated if N is given) For square matrices n is almost always m.
3586 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3587 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3588 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3589            (same value is used for all local rows)
3590 .  d_nnz - array containing the number of nonzeros in the various rows of the
3591            DIAGONAL portion of the local submatrix (possibly different for each row)
3592            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3593            The size of this array is equal to the number of local rows, i.e 'm'.
3594            You must leave room for the diagonal entry even if it is zero.
3595 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3596            submatrix (same value is used for all local rows).
3597 -  o_nnz - array containing the number of nonzeros in the various rows of the
3598            OFF-DIAGONAL portion of the local submatrix (possibly different for
3599            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3600            structure. The size of this array is equal to the number
3601            of local rows, i.e 'm'.
3602 
3603    Output Parameter:
3604 .  A - the matrix
3605 
3606    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3607    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3608    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3609 
3610    Notes:
3611    If the *_nnz parameter is given then the *_nz parameter is ignored
3612 
3613    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3614    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3615    storage requirements for this matrix.
3616 
3617    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3618    processor than it must be used on all processors that share the object for
3619    that argument.
3620 
3621    The user MUST specify either the local or global matrix dimensions
3622    (possibly both).
3623 
3624    The parallel matrix is partitioned across processors such that the
3625    first m0 rows belong to process 0, the next m1 rows belong to
3626    process 1, the next m2 rows belong to process 2 etc.. where
3627    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3628    values corresponding to [m x N] submatrix.
3629 
3630    The columns are logically partitioned with the n0 columns belonging
3631    to 0th partition, the next n1 columns belonging to the next
3632    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3633 
3634    The DIAGONAL portion of the local submatrix on any given processor
3635    is the submatrix corresponding to the rows and columns m,n
3636    corresponding to the given processor. i.e diagonal matrix on
3637    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3638    etc. The remaining portion of the local submatrix [m x (N-n)]
3639    constitute the OFF-DIAGONAL portion. The example below better
3640    illustrates this concept.
3641 
3642    For a square global matrix we define each processor's diagonal portion
3643    to be its local rows and the corresponding columns (a square submatrix);
3644    each processor's off-diagonal portion encompasses the remainder of the
3645    local matrix (a rectangular submatrix).
3646 
3647    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3648 
3649    When calling this routine with a single process communicator, a matrix of
3650    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3651    type of communicator, use the construction mechanism:
3652      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3653 
3654    By default, this format uses inodes (identical nodes) when possible.
3655    We search for consecutive rows with the same nonzero structure, thereby
3656    reusing matrix information to achieve increased efficiency.
3657 
3658    Options Database Keys:
3659 +  -mat_no_inode  - Do not use inodes
3660 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3661 -  -mat_aij_oneindex - Internally use indexing starting at 1
3662         rather than 0.  Note that when calling MatSetValues(),
3663         the user still MUST index entries starting at 0!
3664 
3665 
3666    Example usage:
3667 
3668    Consider the following 8x8 matrix with 34 non-zero values, that is
3669    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3670    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3671    as follows:
3672 
3673 .vb
3674             1  2  0  |  0  3  0  |  0  4
3675     Proc0   0  5  6  |  7  0  0  |  8  0
3676             9  0 10  | 11  0  0  | 12  0
3677     -------------------------------------
3678            13  0 14  | 15 16 17  |  0  0
3679     Proc1   0 18  0  | 19 20 21  |  0  0
3680             0  0  0  | 22 23  0  | 24  0
3681     -------------------------------------
3682     Proc2  25 26 27  |  0  0 28  | 29  0
3683            30  0  0  | 31 32 33  |  0 34
3684 .ve
3685 
3686    This can be represented as a collection of submatrices as:
3687 
3688 .vb
3689       A B C
3690       D E F
3691       G H I
3692 .ve
3693 
3694    Where the submatrices A,B,C are owned by proc0, D,E,F are
3695    owned by proc1, G,H,I are owned by proc2.
3696 
3697    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3698    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3699    The 'M','N' parameters are 8,8, and have the same values on all procs.
3700 
3701    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3702    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3703    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3704    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3705    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3706    matrix, ans [DF] as another SeqAIJ matrix.
3707 
3708    When d_nz, o_nz parameters are specified, d_nz storage elements are
3709    allocated for every row of the local diagonal submatrix, and o_nz
3710    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3711    One way to choose d_nz and o_nz is to use the max nonzerors per local
3712    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3713    In this case, the values of d_nz,o_nz are:
3714 .vb
3715      proc0 : dnz = 2, o_nz = 2
3716      proc1 : dnz = 3, o_nz = 2
3717      proc2 : dnz = 1, o_nz = 4
3718 .ve
3719    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3720    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3721    for proc3. i.e we are using 12+15+10=37 storage locations to store
3722    34 values.
3723 
3724    When d_nnz, o_nnz parameters are specified, the storage is specified
3725    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3726    In the above case the values for d_nnz,o_nnz are:
3727 .vb
3728      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3729      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3730      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3731 .ve
3732    Here the space allocated is sum of all the above values i.e 34, and
3733    hence pre-allocation is perfect.
3734 
3735    Level: intermediate
3736 
3737 .keywords: matrix, aij, compressed row, sparse, parallel
3738 
3739 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3740           MPIAIJ, MatCreateMPIAIJWithArrays()
3741 @*/
3742 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3743 {
3744   PetscErrorCode ierr;
3745   PetscMPIInt    size;
3746 
3747   PetscFunctionBegin;
3748   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3749   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3750   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3751   if (size > 1) {
3752     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3753     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3754   } else {
3755     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3756     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3757   }
3758   PetscFunctionReturn(0);
3759 }
3760 
3761 #undef __FUNCT__
3762 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3763 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3764 {
3765   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3766 
3767   PetscFunctionBegin;
3768   *Ad     = a->A;
3769   *Ao     = a->B;
3770   *colmap = a->garray;
3771   PetscFunctionReturn(0);
3772 }
3773 
3774 #undef __FUNCT__
3775 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3776 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3777 {
3778   PetscErrorCode ierr;
3779   PetscInt       i;
3780   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3781 
3782   PetscFunctionBegin;
3783   if (coloring->ctype == IS_COLORING_GLOBAL) {
3784     ISColoringValue *allcolors,*colors;
3785     ISColoring      ocoloring;
3786 
3787     /* set coloring for diagonal portion */
3788     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3789 
3790     /* set coloring for off-diagonal portion */
3791     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3792     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3793     for (i=0; i<a->B->cmap->n; i++) {
3794       colors[i] = allcolors[a->garray[i]];
3795     }
3796     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3797     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3798     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3799     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3800   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3801     ISColoringValue *colors;
3802     PetscInt        *larray;
3803     ISColoring      ocoloring;
3804 
3805     /* set coloring for diagonal portion */
3806     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3807     for (i=0; i<a->A->cmap->n; i++) {
3808       larray[i] = i + A->cmap->rstart;
3809     }
3810     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3811     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3812     for (i=0; i<a->A->cmap->n; i++) {
3813       colors[i] = coloring->colors[larray[i]];
3814     }
3815     ierr = PetscFree(larray);CHKERRQ(ierr);
3816     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3817     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3818     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3819 
3820     /* set coloring for off-diagonal portion */
3821     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3822     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3823     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3824     for (i=0; i<a->B->cmap->n; i++) {
3825       colors[i] = coloring->colors[larray[i]];
3826     }
3827     ierr = PetscFree(larray);CHKERRQ(ierr);
3828     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3829     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3830     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3831   } else {
3832     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3833   }
3834 
3835   PetscFunctionReturn(0);
3836 }
3837 
3838 #if defined(PETSC_HAVE_ADIC)
3839 #undef __FUNCT__
3840 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3841 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3842 {
3843   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3844   PetscErrorCode ierr;
3845 
3846   PetscFunctionBegin;
3847   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3848   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3849   PetscFunctionReturn(0);
3850 }
3851 #endif
3852 
3853 #undef __FUNCT__
3854 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3855 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3856 {
3857   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3858   PetscErrorCode ierr;
3859 
3860   PetscFunctionBegin;
3861   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3862   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3863   PetscFunctionReturn(0);
3864 }
3865 
3866 #undef __FUNCT__
3867 #define __FUNCT__ "MatMerge"
3868 /*@
3869       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3870                  matrices from each processor
3871 
3872     Collective on MPI_Comm
3873 
3874    Input Parameters:
3875 +    comm - the communicators the parallel matrix will live on
3876 .    inmat - the input sequential matrices
3877 .    n - number of local columns (or PETSC_DECIDE)
3878 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3879 
3880    Output Parameter:
3881 .    outmat - the parallel matrix generated
3882 
3883     Level: advanced
3884 
3885    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3886 
3887 @*/
3888 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3889 {
3890   PetscErrorCode ierr;
3891   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3892   PetscInt       *indx;
3893   PetscScalar    *values;
3894 
3895   PetscFunctionBegin;
3896   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3897   if (scall == MAT_INITIAL_MATRIX){
3898     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3899     if (n == PETSC_DECIDE){
3900       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3901     }
3902     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3903     rstart -= m;
3904 
3905     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3906     for (i=0;i<m;i++) {
3907       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3908       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3909       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3910     }
3911     /* This routine will ONLY return MPIAIJ type matrix */
3912     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3913     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3914     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3915     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3916     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3917 
3918   } else if (scall == MAT_REUSE_MATRIX){
3919     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3920   } else {
3921     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3922   }
3923 
3924   for (i=0;i<m;i++) {
3925     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3926     Ii    = i + rstart;
3927     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3928     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3929   }
3930   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3931   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3932   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3933 
3934   PetscFunctionReturn(0);
3935 }
3936 
3937 #undef __FUNCT__
3938 #define __FUNCT__ "MatFileSplit"
3939 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3940 {
3941   PetscErrorCode    ierr;
3942   PetscMPIInt       rank;
3943   PetscInt          m,N,i,rstart,nnz;
3944   size_t            len;
3945   const PetscInt    *indx;
3946   PetscViewer       out;
3947   char              *name;
3948   Mat               B;
3949   const PetscScalar *values;
3950 
3951   PetscFunctionBegin;
3952   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3953   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3954   /* Should this be the type of the diagonal block of A? */
3955   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3956   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3957   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3958   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3959   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3960   for (i=0;i<m;i++) {
3961     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3962     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3963     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3964   }
3965   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3966   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3967 
3968   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3969   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3970   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3971   sprintf(name,"%s.%d",outfile,rank);
3972   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3973   ierr = PetscFree(name);
3974   ierr = MatView(B,out);CHKERRQ(ierr);
3975   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3976   ierr = MatDestroy(B);CHKERRQ(ierr);
3977   PetscFunctionReturn(0);
3978 }
3979 
3980 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3981 #undef __FUNCT__
3982 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3983 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3984 {
3985   PetscErrorCode       ierr;
3986   Mat_Merge_SeqsToMPI  *merge;
3987   PetscContainer       container;
3988 
3989   PetscFunctionBegin;
3990   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3991   if (container) {
3992     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3993     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3994     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3995     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3996     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3997     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3998     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3999     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4000     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4001     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4002     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4003     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4004     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4005     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
4006 
4007     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4008     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4009   }
4010   ierr = PetscFree(merge);CHKERRQ(ierr);
4011 
4012   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4013   PetscFunctionReturn(0);
4014 }
4015 
4016 #include "../src/mat/utils/freespace.h"
4017 #include "petscbt.h"
4018 
4019 #undef __FUNCT__
4020 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4021 /*@C
4022       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4023                  matrices from each processor
4024 
4025     Collective on MPI_Comm
4026 
4027    Input Parameters:
4028 +    comm - the communicators the parallel matrix will live on
4029 .    seqmat - the input sequential matrices
4030 .    m - number of local rows (or PETSC_DECIDE)
4031 .    n - number of local columns (or PETSC_DECIDE)
4032 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4033 
4034    Output Parameter:
4035 .    mpimat - the parallel matrix generated
4036 
4037     Level: advanced
4038 
4039    Notes:
4040      The dimensions of the sequential matrix in each processor MUST be the same.
4041      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4042      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4043 @*/
4044 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4045 {
4046   PetscErrorCode       ierr;
4047   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4048   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4049   PetscMPIInt          size,rank,taga,*len_s;
4050   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4051   PetscInt             proc,m;
4052   PetscInt             **buf_ri,**buf_rj;
4053   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4054   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4055   MPI_Request          *s_waits,*r_waits;
4056   MPI_Status           *status;
4057   MatScalar            *aa=a->a;
4058   MatScalar            **abuf_r,*ba_i;
4059   Mat_Merge_SeqsToMPI  *merge;
4060   PetscContainer       container;
4061 
4062   PetscFunctionBegin;
4063   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4064 
4065   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4066   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4067 
4068   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4069   if (container) {
4070     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4071   }
4072   bi     = merge->bi;
4073   bj     = merge->bj;
4074   buf_ri = merge->buf_ri;
4075   buf_rj = merge->buf_rj;
4076 
4077   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4078   owners = merge->rowmap->range;
4079   len_s  = merge->len_s;
4080 
4081   /* send and recv matrix values */
4082   /*-----------------------------*/
4083   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4084   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4085 
4086   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4087   for (proc=0,k=0; proc<size; proc++){
4088     if (!len_s[proc]) continue;
4089     i = owners[proc];
4090     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4091     k++;
4092   }
4093 
4094   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4095   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4096   ierr = PetscFree(status);CHKERRQ(ierr);
4097 
4098   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4099   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4100 
4101   /* insert mat values of mpimat */
4102   /*----------------------------*/
4103   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4104   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4105 
4106   for (k=0; k<merge->nrecv; k++){
4107     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4108     nrows = *(buf_ri_k[k]);
4109     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4110     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4111   }
4112 
4113   /* set values of ba */
4114   m = merge->rowmap->n;
4115   for (i=0; i<m; i++) {
4116     arow = owners[rank] + i;
4117     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4118     bnzi = bi[i+1] - bi[i];
4119     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4120 
4121     /* add local non-zero vals of this proc's seqmat into ba */
4122     anzi = ai[arow+1] - ai[arow];
4123     aj   = a->j + ai[arow];
4124     aa   = a->a + ai[arow];
4125     nextaj = 0;
4126     for (j=0; nextaj<anzi; j++){
4127       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4128         ba_i[j] += aa[nextaj++];
4129       }
4130     }
4131 
4132     /* add received vals into ba */
4133     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4134       /* i-th row */
4135       if (i == *nextrow[k]) {
4136         anzi = *(nextai[k]+1) - *nextai[k];
4137         aj   = buf_rj[k] + *(nextai[k]);
4138         aa   = abuf_r[k] + *(nextai[k]);
4139         nextaj = 0;
4140         for (j=0; nextaj<anzi; j++){
4141           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4142             ba_i[j] += aa[nextaj++];
4143           }
4144         }
4145         nextrow[k]++; nextai[k]++;
4146       }
4147     }
4148     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4149   }
4150   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4151   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4152 
4153   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4154   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4155   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4156   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4157   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4158   PetscFunctionReturn(0);
4159 }
4160 
4161 #undef __FUNCT__
4162 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4163 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4164 {
4165   PetscErrorCode       ierr;
4166   Mat                  B_mpi;
4167   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4168   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4169   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4170   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4171   PetscInt             len,proc,*dnz,*onz;
4172   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4173   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4174   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4175   MPI_Status           *status;
4176   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4177   PetscBT              lnkbt;
4178   Mat_Merge_SeqsToMPI  *merge;
4179   PetscContainer       container;
4180 
4181   PetscFunctionBegin;
4182   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4183 
4184   /* make sure it is a PETSc comm */
4185   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4186   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4187   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4188 
4189   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4190   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4191 
4192   /* determine row ownership */
4193   /*---------------------------------------------------------*/
4194   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4195   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4196   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4197   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4198   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4199   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4200   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4201 
4202   m      = merge->rowmap->n;
4203   M      = merge->rowmap->N;
4204   owners = merge->rowmap->range;
4205 
4206   /* determine the number of messages to send, their lengths */
4207   /*---------------------------------------------------------*/
4208   len_s  = merge->len_s;
4209 
4210   len = 0;  /* length of buf_si[] */
4211   merge->nsend = 0;
4212   for (proc=0; proc<size; proc++){
4213     len_si[proc] = 0;
4214     if (proc == rank){
4215       len_s[proc] = 0;
4216     } else {
4217       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4218       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4219     }
4220     if (len_s[proc]) {
4221       merge->nsend++;
4222       nrows = 0;
4223       for (i=owners[proc]; i<owners[proc+1]; i++){
4224         if (ai[i+1] > ai[i]) nrows++;
4225       }
4226       len_si[proc] = 2*(nrows+1);
4227       len += len_si[proc];
4228     }
4229   }
4230 
4231   /* determine the number and length of messages to receive for ij-structure */
4232   /*-------------------------------------------------------------------------*/
4233   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4234   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4235 
4236   /* post the Irecv of j-structure */
4237   /*-------------------------------*/
4238   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4239   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4240 
4241   /* post the Isend of j-structure */
4242   /*--------------------------------*/
4243   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4244 
4245   for (proc=0, k=0; proc<size; proc++){
4246     if (!len_s[proc]) continue;
4247     i = owners[proc];
4248     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4249     k++;
4250   }
4251 
4252   /* receives and sends of j-structure are complete */
4253   /*------------------------------------------------*/
4254   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4255   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4256 
4257   /* send and recv i-structure */
4258   /*---------------------------*/
4259   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4260   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4261 
4262   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4263   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4264   for (proc=0,k=0; proc<size; proc++){
4265     if (!len_s[proc]) continue;
4266     /* form outgoing message for i-structure:
4267          buf_si[0]:                 nrows to be sent
4268                [1:nrows]:           row index (global)
4269                [nrows+1:2*nrows+1]: i-structure index
4270     */
4271     /*-------------------------------------------*/
4272     nrows = len_si[proc]/2 - 1;
4273     buf_si_i    = buf_si + nrows+1;
4274     buf_si[0]   = nrows;
4275     buf_si_i[0] = 0;
4276     nrows = 0;
4277     for (i=owners[proc]; i<owners[proc+1]; i++){
4278       anzi = ai[i+1] - ai[i];
4279       if (anzi) {
4280         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4281         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4282         nrows++;
4283       }
4284     }
4285     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4286     k++;
4287     buf_si += len_si[proc];
4288   }
4289 
4290   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4291   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4292 
4293   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4294   for (i=0; i<merge->nrecv; i++){
4295     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4296   }
4297 
4298   ierr = PetscFree(len_si);CHKERRQ(ierr);
4299   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4300   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4301   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4302   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4303   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4304   ierr = PetscFree(status);CHKERRQ(ierr);
4305 
4306   /* compute a local seq matrix in each processor */
4307   /*----------------------------------------------*/
4308   /* allocate bi array and free space for accumulating nonzero column info */
4309   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4310   bi[0] = 0;
4311 
4312   /* create and initialize a linked list */
4313   nlnk = N+1;
4314   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4315 
4316   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4317   len = 0;
4318   len  = ai[owners[rank+1]] - ai[owners[rank]];
4319   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4320   current_space = free_space;
4321 
4322   /* determine symbolic info for each local row */
4323   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4324 
4325   for (k=0; k<merge->nrecv; k++){
4326     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4327     nrows = *buf_ri_k[k];
4328     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4329     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4330   }
4331 
4332   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4333   len = 0;
4334   for (i=0;i<m;i++) {
4335     bnzi   = 0;
4336     /* add local non-zero cols of this proc's seqmat into lnk */
4337     arow   = owners[rank] + i;
4338     anzi   = ai[arow+1] - ai[arow];
4339     aj     = a->j + ai[arow];
4340     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4341     bnzi += nlnk;
4342     /* add received col data into lnk */
4343     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4344       if (i == *nextrow[k]) { /* i-th row */
4345         anzi = *(nextai[k]+1) - *nextai[k];
4346         aj   = buf_rj[k] + *nextai[k];
4347         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4348         bnzi += nlnk;
4349         nextrow[k]++; nextai[k]++;
4350       }
4351     }
4352     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4353 
4354     /* if free space is not available, make more free space */
4355     if (current_space->local_remaining<bnzi) {
4356       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4357       nspacedouble++;
4358     }
4359     /* copy data into free space, then initialize lnk */
4360     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4361     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4362 
4363     current_space->array           += bnzi;
4364     current_space->local_used      += bnzi;
4365     current_space->local_remaining -= bnzi;
4366 
4367     bi[i+1] = bi[i] + bnzi;
4368   }
4369 
4370   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4371 
4372   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4373   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4374   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4375 
4376   /* create symbolic parallel matrix B_mpi */
4377   /*---------------------------------------*/
4378   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4379   if (n==PETSC_DECIDE) {
4380     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4381   } else {
4382     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4383   }
4384   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4385   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4386   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4387 
4388   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4389   B_mpi->assembled     = PETSC_FALSE;
4390   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4391   merge->bi            = bi;
4392   merge->bj            = bj;
4393   merge->buf_ri        = buf_ri;
4394   merge->buf_rj        = buf_rj;
4395   merge->coi           = PETSC_NULL;
4396   merge->coj           = PETSC_NULL;
4397   merge->owners_co     = PETSC_NULL;
4398 
4399   /* attach the supporting struct to B_mpi for reuse */
4400   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4401   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4402   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4403   *mpimat = B_mpi;
4404 
4405   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4406   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4407   PetscFunctionReturn(0);
4408 }
4409 
4410 #undef __FUNCT__
4411 #define __FUNCT__ "MatMerge_SeqsToMPI"
4412 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4413 {
4414   PetscErrorCode   ierr;
4415 
4416   PetscFunctionBegin;
4417   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4418   if (scall == MAT_INITIAL_MATRIX){
4419     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4420   }
4421   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4422   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4423   PetscFunctionReturn(0);
4424 }
4425 
4426 #undef __FUNCT__
4427 #define __FUNCT__ "MatGetLocalMat"
4428 /*@
4429      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4430 
4431     Not Collective
4432 
4433    Input Parameters:
4434 +    A - the matrix
4435 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4436 
4437    Output Parameter:
4438 .    A_loc - the local sequential matrix generated
4439 
4440     Level: developer
4441 
4442 @*/
4443 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4444 {
4445   PetscErrorCode  ierr;
4446   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4447   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4448   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4449   MatScalar       *aa=a->a,*ba=b->a,*cam;
4450   PetscScalar     *ca;
4451   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4452   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4453 
4454   PetscFunctionBegin;
4455   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4456   if (scall == MAT_INITIAL_MATRIX){
4457     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4458     ci[0] = 0;
4459     for (i=0; i<am; i++){
4460       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4461     }
4462     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4463     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4464     k = 0;
4465     for (i=0; i<am; i++) {
4466       ncols_o = bi[i+1] - bi[i];
4467       ncols_d = ai[i+1] - ai[i];
4468       /* off-diagonal portion of A */
4469       for (jo=0; jo<ncols_o; jo++) {
4470         col = cmap[*bj];
4471         if (col >= cstart) break;
4472         cj[k]   = col; bj++;
4473         ca[k++] = *ba++;
4474       }
4475       /* diagonal portion of A */
4476       for (j=0; j<ncols_d; j++) {
4477         cj[k]   = cstart + *aj++;
4478         ca[k++] = *aa++;
4479       }
4480       /* off-diagonal portion of A */
4481       for (j=jo; j<ncols_o; j++) {
4482         cj[k]   = cmap[*bj++];
4483         ca[k++] = *ba++;
4484       }
4485     }
4486     /* put together the new matrix */
4487     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4488     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4489     /* Since these are PETSc arrays, change flags to free them as necessary. */
4490     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4491     mat->free_a  = PETSC_TRUE;
4492     mat->free_ij = PETSC_TRUE;
4493     mat->nonew   = 0;
4494   } else if (scall == MAT_REUSE_MATRIX){
4495     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4496     ci = mat->i; cj = mat->j; cam = mat->a;
4497     for (i=0; i<am; i++) {
4498       /* off-diagonal portion of A */
4499       ncols_o = bi[i+1] - bi[i];
4500       for (jo=0; jo<ncols_o; jo++) {
4501         col = cmap[*bj];
4502         if (col >= cstart) break;
4503         *cam++ = *ba++; bj++;
4504       }
4505       /* diagonal portion of A */
4506       ncols_d = ai[i+1] - ai[i];
4507       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4508       /* off-diagonal portion of A */
4509       for (j=jo; j<ncols_o; j++) {
4510         *cam++ = *ba++; bj++;
4511       }
4512     }
4513   } else {
4514     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4515   }
4516 
4517   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4518   PetscFunctionReturn(0);
4519 }
4520 
4521 #undef __FUNCT__
4522 #define __FUNCT__ "MatGetLocalMatCondensed"
4523 /*@C
4524      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4525 
4526     Not Collective
4527 
4528    Input Parameters:
4529 +    A - the matrix
4530 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4531 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4532 
4533    Output Parameter:
4534 .    A_loc - the local sequential matrix generated
4535 
4536     Level: developer
4537 
4538 @*/
4539 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4540 {
4541   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4542   PetscErrorCode    ierr;
4543   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4544   IS                isrowa,iscola;
4545   Mat               *aloc;
4546 
4547   PetscFunctionBegin;
4548   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4549   if (!row){
4550     start = A->rmap->rstart; end = A->rmap->rend;
4551     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4552   } else {
4553     isrowa = *row;
4554   }
4555   if (!col){
4556     start = A->cmap->rstart;
4557     cmap  = a->garray;
4558     nzA   = a->A->cmap->n;
4559     nzB   = a->B->cmap->n;
4560     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4561     ncols = 0;
4562     for (i=0; i<nzB; i++) {
4563       if (cmap[i] < start) idx[ncols++] = cmap[i];
4564       else break;
4565     }
4566     imark = i;
4567     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4568     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4569     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4570     ierr = PetscFree(idx);CHKERRQ(ierr);
4571   } else {
4572     iscola = *col;
4573   }
4574   if (scall != MAT_INITIAL_MATRIX){
4575     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4576     aloc[0] = *A_loc;
4577   }
4578   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4579   *A_loc = aloc[0];
4580   ierr = PetscFree(aloc);CHKERRQ(ierr);
4581   if (!row){
4582     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4583   }
4584   if (!col){
4585     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4586   }
4587   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4588   PetscFunctionReturn(0);
4589 }
4590 
4591 #undef __FUNCT__
4592 #define __FUNCT__ "MatGetBrowsOfAcols"
4593 /*@C
4594     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4595 
4596     Collective on Mat
4597 
4598    Input Parameters:
4599 +    A,B - the matrices in mpiaij format
4600 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4601 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4602 
4603    Output Parameter:
4604 +    rowb, colb - index sets of rows and columns of B to extract
4605 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4606 -    B_seq - the sequential matrix generated
4607 
4608     Level: developer
4609 
4610 @*/
4611 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4612 {
4613   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4614   PetscErrorCode    ierr;
4615   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4616   IS                isrowb,iscolb;
4617   Mat               *bseq;
4618 
4619   PetscFunctionBegin;
4620   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4621     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4622   }
4623   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4624 
4625   if (scall == MAT_INITIAL_MATRIX){
4626     start = A->cmap->rstart;
4627     cmap  = a->garray;
4628     nzA   = a->A->cmap->n;
4629     nzB   = a->B->cmap->n;
4630     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4631     ncols = 0;
4632     for (i=0; i<nzB; i++) {  /* row < local row index */
4633       if (cmap[i] < start) idx[ncols++] = cmap[i];
4634       else break;
4635     }
4636     imark = i;
4637     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4638     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4639     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4640     ierr = PetscFree(idx);CHKERRQ(ierr);
4641     *brstart = imark;
4642     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4643   } else {
4644     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4645     isrowb = *rowb; iscolb = *colb;
4646     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4647     bseq[0] = *B_seq;
4648   }
4649   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4650   *B_seq = bseq[0];
4651   ierr = PetscFree(bseq);CHKERRQ(ierr);
4652   if (!rowb){
4653     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4654   } else {
4655     *rowb = isrowb;
4656   }
4657   if (!colb){
4658     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4659   } else {
4660     *colb = iscolb;
4661   }
4662   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4663   PetscFunctionReturn(0);
4664 }
4665 
4666 #undef __FUNCT__
4667 #define __FUNCT__ "MatGetBrowsOfAoCols"
4668 /*@C
4669     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4670     of the OFF-DIAGONAL portion of local A
4671 
4672     Collective on Mat
4673 
4674    Input Parameters:
4675 +    A,B - the matrices in mpiaij format
4676 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4677 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4678 .    startsj_r - similar to startsj for receives
4679 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4680 
4681    Output Parameter:
4682 +    B_oth - the sequential matrix generated
4683 
4684     Level: developer
4685 
4686 @*/
4687 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4688 {
4689   VecScatter_MPI_General *gen_to,*gen_from;
4690   PetscErrorCode         ierr;
4691   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4692   Mat_SeqAIJ             *b_oth;
4693   VecScatter             ctx=a->Mvctx;
4694   MPI_Comm               comm=((PetscObject)ctx)->comm;
4695   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4696   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4697   PetscScalar            *rvalues,*svalues;
4698   MatScalar              *b_otha,*bufa,*bufA;
4699   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4700   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4701   MPI_Status             *sstatus,rstatus;
4702   PetscMPIInt            jj;
4703   PetscInt               *cols,sbs,rbs;
4704   PetscScalar            *vals;
4705 
4706   PetscFunctionBegin;
4707   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4708     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4709   }
4710   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4711   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4712 
4713   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4714   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4715   rvalues  = gen_from->values; /* holds the length of receiving row */
4716   svalues  = gen_to->values;   /* holds the length of sending row */
4717   nrecvs   = gen_from->n;
4718   nsends   = gen_to->n;
4719 
4720   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4721   srow     = gen_to->indices;   /* local row index to be sent */
4722   sstarts  = gen_to->starts;
4723   sprocs   = gen_to->procs;
4724   sstatus  = gen_to->sstatus;
4725   sbs      = gen_to->bs;
4726   rstarts  = gen_from->starts;
4727   rprocs   = gen_from->procs;
4728   rbs      = gen_from->bs;
4729 
4730   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4731   if (scall == MAT_INITIAL_MATRIX){
4732     /* i-array */
4733     /*---------*/
4734     /*  post receives */
4735     for (i=0; i<nrecvs; i++){
4736       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4737       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4738       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4739     }
4740 
4741     /* pack the outgoing message */
4742     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
4743     sstartsj[0] = 0;  rstartsj[0] = 0;
4744     len = 0; /* total length of j or a array to be sent */
4745     k = 0;
4746     for (i=0; i<nsends; i++){
4747       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4748       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4749       for (j=0; j<nrows; j++) {
4750         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4751         for (l=0; l<sbs; l++){
4752           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4753           rowlen[j*sbs+l] = ncols;
4754           len += ncols;
4755           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4756         }
4757         k++;
4758       }
4759       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4760       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4761     }
4762     /* recvs and sends of i-array are completed */
4763     i = nrecvs;
4764     while (i--) {
4765       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4766     }
4767     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4768 
4769     /* allocate buffers for sending j and a arrays */
4770     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4771     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4772 
4773     /* create i-array of B_oth */
4774     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4775     b_othi[0] = 0;
4776     len = 0; /* total length of j or a array to be received */
4777     k = 0;
4778     for (i=0; i<nrecvs; i++){
4779       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4780       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4781       for (j=0; j<nrows; j++) {
4782         b_othi[k+1] = b_othi[k] + rowlen[j];
4783         len += rowlen[j]; k++;
4784       }
4785       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4786     }
4787 
4788     /* allocate space for j and a arrrays of B_oth */
4789     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4790     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4791 
4792     /* j-array */
4793     /*---------*/
4794     /*  post receives of j-array */
4795     for (i=0; i<nrecvs; i++){
4796       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4797       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4798     }
4799 
4800     /* pack the outgoing message j-array */
4801     k = 0;
4802     for (i=0; i<nsends; i++){
4803       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4804       bufJ = bufj+sstartsj[i];
4805       for (j=0; j<nrows; j++) {
4806         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4807         for (ll=0; ll<sbs; ll++){
4808           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4809           for (l=0; l<ncols; l++){
4810             *bufJ++ = cols[l];
4811           }
4812           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4813         }
4814       }
4815       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4816     }
4817 
4818     /* recvs and sends of j-array are completed */
4819     i = nrecvs;
4820     while (i--) {
4821       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4822     }
4823     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4824   } else if (scall == MAT_REUSE_MATRIX){
4825     sstartsj = *startsj;
4826     rstartsj = *startsj_r;
4827     bufa     = *bufa_ptr;
4828     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4829     b_otha   = b_oth->a;
4830   } else {
4831     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4832   }
4833 
4834   /* a-array */
4835   /*---------*/
4836   /*  post receives of a-array */
4837   for (i=0; i<nrecvs; i++){
4838     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4839     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4840   }
4841 
4842   /* pack the outgoing message a-array */
4843   k = 0;
4844   for (i=0; i<nsends; i++){
4845     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4846     bufA = bufa+sstartsj[i];
4847     for (j=0; j<nrows; j++) {
4848       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4849       for (ll=0; ll<sbs; ll++){
4850         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4851         for (l=0; l<ncols; l++){
4852           *bufA++ = vals[l];
4853         }
4854         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4855       }
4856     }
4857     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4858   }
4859   /* recvs and sends of a-array are completed */
4860   i = nrecvs;
4861   while (i--) {
4862     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4863   }
4864   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4865   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4866 
4867   if (scall == MAT_INITIAL_MATRIX){
4868     /* put together the new matrix */
4869     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4870 
4871     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4872     /* Since these are PETSc arrays, change flags to free them as necessary. */
4873     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4874     b_oth->free_a  = PETSC_TRUE;
4875     b_oth->free_ij = PETSC_TRUE;
4876     b_oth->nonew   = 0;
4877 
4878     ierr = PetscFree(bufj);CHKERRQ(ierr);
4879     if (!startsj || !bufa_ptr){
4880       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4881       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4882     } else {
4883       *startsj   = sstartsj;
4884       *startsj_r = rstartsj;
4885       *bufa_ptr  = bufa;
4886     }
4887   }
4888   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4889   PetscFunctionReturn(0);
4890 }
4891 
4892 #undef __FUNCT__
4893 #define __FUNCT__ "MatGetCommunicationStructs"
4894 /*@C
4895   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4896 
4897   Not Collective
4898 
4899   Input Parameters:
4900 . A - The matrix in mpiaij format
4901 
4902   Output Parameter:
4903 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4904 . colmap - A map from global column index to local index into lvec
4905 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4906 
4907   Level: developer
4908 
4909 @*/
4910 #if defined (PETSC_USE_CTABLE)
4911 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4912 #else
4913 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4914 #endif
4915 {
4916   Mat_MPIAIJ *a;
4917 
4918   PetscFunctionBegin;
4919   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4920   PetscValidPointer(lvec, 2)
4921   PetscValidPointer(colmap, 3)
4922   PetscValidPointer(multScatter, 4)
4923   a = (Mat_MPIAIJ *) A->data;
4924   if (lvec) *lvec = a->lvec;
4925   if (colmap) *colmap = a->colmap;
4926   if (multScatter) *multScatter = a->Mvctx;
4927   PetscFunctionReturn(0);
4928 }
4929 
4930 EXTERN_C_BEGIN
4931 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4932 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4933 EXTERN_C_END
4934 
4935 #undef __FUNCT__
4936 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4937 /*
4938     Computes (B'*A')' since computing B*A directly is untenable
4939 
4940                n                       p                          p
4941         (              )       (              )         (                  )
4942       m (      A       )  *  n (       B      )   =   m (         C        )
4943         (              )       (              )         (                  )
4944 
4945 */
4946 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4947 {
4948   PetscErrorCode     ierr;
4949   Mat                At,Bt,Ct;
4950 
4951   PetscFunctionBegin;
4952   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4953   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4954   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4955   ierr = MatDestroy(At);CHKERRQ(ierr);
4956   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4957   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4958   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4959   PetscFunctionReturn(0);
4960 }
4961 
4962 #undef __FUNCT__
4963 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4964 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4965 {
4966   PetscErrorCode ierr;
4967   PetscInt       m=A->rmap->n,n=B->cmap->n;
4968   Mat            Cmat;
4969 
4970   PetscFunctionBegin;
4971   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4972   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4973   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4974   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4975   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4976   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4977   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4978   *C   = Cmat;
4979   PetscFunctionReturn(0);
4980 }
4981 
4982 /* ----------------------------------------------------------------*/
4983 #undef __FUNCT__
4984 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4985 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4986 {
4987   PetscErrorCode ierr;
4988 
4989   PetscFunctionBegin;
4990   if (scall == MAT_INITIAL_MATRIX){
4991     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4992   }
4993   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4994   PetscFunctionReturn(0);
4995 }
4996 
4997 EXTERN_C_BEGIN
4998 #if defined(PETSC_HAVE_MUMPS)
4999 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*);
5000 #endif
5001 #if defined(PETSC_HAVE_PASTIX)
5002 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5003 #endif
5004 #if defined(PETSC_HAVE_SUPERLU_DIST)
5005 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5006 #endif
5007 #if defined(PETSC_HAVE_SPOOLES)
5008 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5009 #endif
5010 EXTERN_C_END
5011 
5012 /*MC
5013    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5014 
5015    Options Database Keys:
5016 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5017 
5018   Level: beginner
5019 
5020 .seealso: MatCreateMPIAIJ()
5021 M*/
5022 
5023 EXTERN_C_BEGIN
5024 #undef __FUNCT__
5025 #define __FUNCT__ "MatCreate_MPIAIJ"
5026 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5027 {
5028   Mat_MPIAIJ     *b;
5029   PetscErrorCode ierr;
5030   PetscMPIInt    size;
5031 
5032   PetscFunctionBegin;
5033   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5034 
5035   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5036   B->data         = (void*)b;
5037   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5038   B->rmap->bs     = 1;
5039   B->assembled    = PETSC_FALSE;
5040   B->mapping      = 0;
5041 
5042   B->insertmode   = NOT_SET_VALUES;
5043   b->size         = size;
5044   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5045 
5046   /* build cache for off array entries formed */
5047   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5048   b->donotstash  = PETSC_FALSE;
5049   b->colmap      = 0;
5050   b->garray      = 0;
5051   b->roworiented = PETSC_TRUE;
5052 
5053   /* stuff used for matrix vector multiply */
5054   b->lvec      = PETSC_NULL;
5055   b->Mvctx     = PETSC_NULL;
5056 
5057   /* stuff for MatGetRow() */
5058   b->rowindices   = 0;
5059   b->rowvalues    = 0;
5060   b->getrowactive = PETSC_FALSE;
5061 
5062 #if defined(PETSC_HAVE_SPOOLES)
5063   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5064                                      "MatGetFactor_mpiaij_spooles",
5065                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5066 #endif
5067 #if defined(PETSC_HAVE_MUMPS)
5068   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5069                                      "MatGetFactor_mpiaij_mumps",
5070                                      MatGetFactor_mpiaij_mumps);CHKERRQ(ierr);
5071 #endif
5072 #if defined(PETSC_HAVE_PASTIX)
5073   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5074 					   "MatGetFactor_mpiaij_pastix",
5075 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5076 #endif
5077 #if defined(PETSC_HAVE_SUPERLU_DIST)
5078   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5079                                      "MatGetFactor_mpiaij_superlu_dist",
5080                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5081 #endif
5082   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5083                                      "MatStoreValues_MPIAIJ",
5084                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5085   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5086                                      "MatRetrieveValues_MPIAIJ",
5087                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5088   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5089 				     "MatGetDiagonalBlock_MPIAIJ",
5090                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5091   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5092 				     "MatIsTranspose_MPIAIJ",
5093 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5094   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5095 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5096 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5097   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5098 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5099 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5100   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5101 				     "MatDiagonalScaleLocal_MPIAIJ",
5102 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5103   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
5104                                      "MatConvert_MPIAIJ_MPICSRPERM",
5105                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5106   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5107                                      "MatConvert_MPIAIJ_MPICRL",
5108                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5109   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5110                                      "MatConvert_MPIAIJ_MPISBAIJ",
5111                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5112   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5113                                      "MatMatMult_MPIDense_MPIAIJ",
5114                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5115   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5116                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5117                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5118   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5119                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5120                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5121   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5122   PetscFunctionReturn(0);
5123 }
5124 EXTERN_C_END
5125 
5126 #undef __FUNCT__
5127 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5128 /*@
5129      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5130          and "off-diagonal" part of the matrix in CSR format.
5131 
5132    Collective on MPI_Comm
5133 
5134    Input Parameters:
5135 +  comm - MPI communicator
5136 .  m - number of local rows (Cannot be PETSC_DECIDE)
5137 .  n - This value should be the same as the local size used in creating the
5138        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5139        calculated if N is given) For square matrices n is almost always m.
5140 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5141 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5142 .   i - row indices for "diagonal" portion of matrix
5143 .   j - column indices
5144 .   a - matrix values
5145 .   oi - row indices for "off-diagonal" portion of matrix
5146 .   oj - column indices
5147 -   oa - matrix values
5148 
5149    Output Parameter:
5150 .   mat - the matrix
5151 
5152    Level: advanced
5153 
5154    Notes:
5155        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5156 
5157        The i and j indices are 0 based
5158 
5159        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5160 
5161        This sets local rows and cannot be used to set off-processor values.
5162 
5163        You cannot later use MatSetValues() to change values in this matrix.
5164 
5165 .keywords: matrix, aij, compressed row, sparse, parallel
5166 
5167 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5168           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5169 @*/
5170 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5171 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5172 {
5173   PetscErrorCode ierr;
5174   Mat_MPIAIJ     *maij;
5175 
5176  PetscFunctionBegin;
5177   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5178   if (i[0]) {
5179     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5180   }
5181   if (oi[0]) {
5182     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5183   }
5184   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5185   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5186   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5187   maij = (Mat_MPIAIJ*) (*mat)->data;
5188   maij->donotstash     = PETSC_TRUE;
5189   (*mat)->preallocated = PETSC_TRUE;
5190 
5191   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5192   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5193   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5194   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5195 
5196   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5197   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5198 
5199   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5200   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5201   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5202   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5203 
5204   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5205   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5206   PetscFunctionReturn(0);
5207 }
5208 
5209 /*
5210     Special version for direct calls from Fortran
5211 */
5212 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5213 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5214 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5215 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5216 #endif
5217 
5218 /* Change these macros so can be used in void function */
5219 #undef CHKERRQ
5220 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5221 #undef SETERRQ2
5222 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5223 #undef SETERRQ
5224 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5225 
5226 EXTERN_C_BEGIN
5227 #undef __FUNCT__
5228 #define __FUNCT__ "matsetvaluesmpiaij_"
5229 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5230 {
5231   Mat             mat = *mmat;
5232   PetscInt        m = *mm, n = *mn;
5233   InsertMode      addv = *maddv;
5234   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5235   PetscScalar     value;
5236   PetscErrorCode  ierr;
5237 
5238   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5239   if (mat->insertmode == NOT_SET_VALUES) {
5240     mat->insertmode = addv;
5241   }
5242 #if defined(PETSC_USE_DEBUG)
5243   else if (mat->insertmode != addv) {
5244     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5245   }
5246 #endif
5247   {
5248   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5249   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5250   PetscTruth      roworiented = aij->roworiented;
5251 
5252   /* Some Variables required in the macro */
5253   Mat             A = aij->A;
5254   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5255   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5256   MatScalar       *aa = a->a;
5257   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5258   Mat             B = aij->B;
5259   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5260   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5261   MatScalar       *ba = b->a;
5262 
5263   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5264   PetscInt        nonew = a->nonew;
5265   MatScalar       *ap1,*ap2;
5266 
5267   PetscFunctionBegin;
5268   for (i=0; i<m; i++) {
5269     if (im[i] < 0) continue;
5270 #if defined(PETSC_USE_DEBUG)
5271     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5272 #endif
5273     if (im[i] >= rstart && im[i] < rend) {
5274       row      = im[i] - rstart;
5275       lastcol1 = -1;
5276       rp1      = aj + ai[row];
5277       ap1      = aa + ai[row];
5278       rmax1    = aimax[row];
5279       nrow1    = ailen[row];
5280       low1     = 0;
5281       high1    = nrow1;
5282       lastcol2 = -1;
5283       rp2      = bj + bi[row];
5284       ap2      = ba + bi[row];
5285       rmax2    = bimax[row];
5286       nrow2    = bilen[row];
5287       low2     = 0;
5288       high2    = nrow2;
5289 
5290       for (j=0; j<n; j++) {
5291         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5292         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5293         if (in[j] >= cstart && in[j] < cend){
5294           col = in[j] - cstart;
5295           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5296         } else if (in[j] < 0) continue;
5297 #if defined(PETSC_USE_DEBUG)
5298         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5299 #endif
5300         else {
5301           if (mat->was_assembled) {
5302             if (!aij->colmap) {
5303               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5304             }
5305 #if defined (PETSC_USE_CTABLE)
5306             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5307 	    col--;
5308 #else
5309             col = aij->colmap[in[j]] - 1;
5310 #endif
5311             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5312               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5313               col =  in[j];
5314               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5315               B = aij->B;
5316               b = (Mat_SeqAIJ*)B->data;
5317               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5318               rp2      = bj + bi[row];
5319               ap2      = ba + bi[row];
5320               rmax2    = bimax[row];
5321               nrow2    = bilen[row];
5322               low2     = 0;
5323               high2    = nrow2;
5324               bm       = aij->B->rmap->n;
5325               ba = b->a;
5326             }
5327           } else col = in[j];
5328           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5329         }
5330       }
5331     } else {
5332       if (!aij->donotstash) {
5333         if (roworiented) {
5334           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5335         } else {
5336           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5337         }
5338       }
5339     }
5340   }}
5341   PetscFunctionReturnVoid();
5342 }
5343 EXTERN_C_END
5344 
5345