xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 81f96dcc196638099f4f8062b7ddb72e8b817c95)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "MatDistribute_MPIAIJ"
7 /*
8     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
9     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
10 
11     Only for square matrices
12 */
13 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
14 {
15   PetscMPIInt    rank,size;
16   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
17   PetscErrorCode ierr;
18   Mat            mat;
19   Mat_SeqAIJ     *gmata;
20   PetscMPIInt    tag;
21   MPI_Status     status;
22   PetscTruth     aij;
23   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
24 
25   PetscFunctionBegin;
26   CHKMEMQ;
27   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
28   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
29   if (!rank) {
30     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
31     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
32   }
33   if (reuse == MAT_INITIAL_MATRIX) {
34     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
35     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
36     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
37     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
38     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
39     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
40     rowners[0] = 0;
41     for (i=2; i<=size; i++) {
42       rowners[i] += rowners[i-1];
43     }
44     rstart = rowners[rank];
45     rend   = rowners[rank+1];
46     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
47     if (!rank) {
48       gmata = (Mat_SeqAIJ*) gmat->data;
49       /* send row lengths to all processors */
50       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
51       for (i=1; i<size; i++) {
52 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
53       }
54       /* determine number diagonal and off-diagonal counts */
55       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
56       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
57       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
58       jj = 0;
59       for (i=0; i<m; i++) {
60 	for (j=0; j<dlens[i]; j++) {
61           if (gmata->j[jj] < rstart) ld[i]++;
62 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
63 	  jj++;
64 	}
65       }
66       /* send column indices to other processes */
67       for (i=1; i<size; i++) {
68 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
69 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
70 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71       }
72 
73       /* send numerical values to other processes */
74       for (i=1; i<size; i++) {
75         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
76         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
77       }
78       gmataa = gmata->a;
79       gmataj = gmata->j;
80 
81     } else {
82       /* receive row lengths */
83       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
84       /* receive column indices */
85       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
86       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
87       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
88       /* determine number diagonal and off-diagonal counts */
89       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
90       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
91       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
92       jj = 0;
93       for (i=0; i<m; i++) {
94 	for (j=0; j<dlens[i]; j++) {
95           if (gmataj[jj] < rstart) ld[i]++;
96 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
97 	  jj++;
98 	}
99       }
100       /* receive numerical values */
101       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
102       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
103     }
104     /* set preallocation */
105     for (i=0; i<m; i++) {
106       dlens[i] -= olens[i];
107     }
108     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
109     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
110 
111     for (i=0; i<m; i++) {
112       dlens[i] += olens[i];
113     }
114     cnt  = 0;
115     for (i=0; i<m; i++) {
116       row  = rstart + i;
117       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
118       cnt += dlens[i];
119     }
120     if (rank) {
121       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
122     }
123     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
124     ierr = PetscFree(rowners);CHKERRQ(ierr);
125     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
126     *inmat = mat;
127   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
128     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
129     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
130     mat   = *inmat;
131     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
132     if (!rank) {
133       /* send numerical values to other processes */
134       gmata = (Mat_SeqAIJ*) gmat->data;
135       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
136       gmataa = gmata->a;
137       for (i=1; i<size; i++) {
138         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
139         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
140       }
141       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
142     } else {
143       /* receive numerical values from process 0*/
144       nz   = Ad->nz + Ao->nz;
145       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
146       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
147     }
148     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
149     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
150     ad = Ad->a;
151     ao = Ao->a;
152     if (mat->rmap->n) {
153       i  = 0;
154       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
155       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
156     }
157     for (i=1; i<mat->rmap->n; i++) {
158       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
159       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
160     }
161     i--;
162     if (mat->rmap->n) {
163       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
164     }
165     if (rank) {
166       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
167     }
168   }
169   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
170   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   CHKMEMQ;
172   PetscFunctionReturn(0);
173 }
174 
175 /*
176   Local utility routine that creates a mapping from the global column
177 number to the local number in the off-diagonal part of the local
178 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
179 a slightly higher hash table cost; without it it is not scalable (each processor
180 has an order N integer array but is fast to acess.
181 */
182 #undef __FUNCT__
183 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
184 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
185 {
186   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
187   PetscErrorCode ierr;
188   PetscInt       n = aij->B->cmap->n,i;
189 
190   PetscFunctionBegin;
191 #if defined (PETSC_USE_CTABLE)
192   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
193   for (i=0; i<n; i++){
194     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
195   }
196 #else
197   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
198   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
199   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
201 #endif
202   PetscFunctionReturn(0);
203 }
204 
205 
206 #define CHUNKSIZE   15
207 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
208 { \
209     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
210     lastcol1 = col;\
211     while (high1-low1 > 5) { \
212       t = (low1+high1)/2; \
213       if (rp1[t] > col) high1 = t; \
214       else             low1  = t; \
215     } \
216       for (_i=low1; _i<high1; _i++) { \
217         if (rp1[_i] > col) break; \
218         if (rp1[_i] == col) { \
219           if (addv == ADD_VALUES) ap1[_i] += value;   \
220           else                    ap1[_i] = value; \
221           goto a_noinsert; \
222         } \
223       }  \
224       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
225       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
226       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
227       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
228       N = nrow1++ - 1; a->nz++; high1++; \
229       /* shift up all the later entries in this row */ \
230       for (ii=N; ii>=_i; ii--) { \
231         rp1[ii+1] = rp1[ii]; \
232         ap1[ii+1] = ap1[ii]; \
233       } \
234       rp1[_i] = col;  \
235       ap1[_i] = value;  \
236       a_noinsert: ; \
237       ailen[row] = nrow1; \
238 }
239 
240 
241 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
242 { \
243     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
244     lastcol2 = col;\
245     while (high2-low2 > 5) { \
246       t = (low2+high2)/2; \
247       if (rp2[t] > col) high2 = t; \
248       else             low2  = t; \
249     } \
250     for (_i=low2; _i<high2; _i++) {		\
251       if (rp2[_i] > col) break;			\
252       if (rp2[_i] == col) {			      \
253 	if (addv == ADD_VALUES) ap2[_i] += value;     \
254 	else                    ap2[_i] = value;      \
255 	goto b_noinsert;			      \
256       }						      \
257     }							      \
258     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
259     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
260     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
261     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
262     N = nrow2++ - 1; b->nz++; high2++;					\
263     /* shift up all the later entries in this row */			\
264     for (ii=N; ii>=_i; ii--) {						\
265       rp2[ii+1] = rp2[ii];						\
266       ap2[ii+1] = ap2[ii];						\
267     }									\
268     rp2[_i] = col;							\
269     ap2[_i] = value;							\
270     b_noinsert: ;								\
271     bilen[row] = nrow2;							\
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
276 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
277 {
278   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
279   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
280   PetscErrorCode ierr;
281   PetscInt       l,*garray = mat->garray,diag;
282 
283   PetscFunctionBegin;
284   /* code only works for square matrices A */
285 
286   /* find size of row to the left of the diagonal part */
287   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
288   row  = row - diag;
289   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
290     if (garray[b->j[b->i[row]+l]] > diag) break;
291   }
292   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
293 
294   /* diagonal part */
295   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
296 
297   /* right of diagonal part */
298   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
299   PetscFunctionReturn(0);
300 }
301 
302 #undef __FUNCT__
303 #define __FUNCT__ "MatSetValues_MPIAIJ"
304 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
305 {
306   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
307   PetscScalar    value;
308   PetscErrorCode ierr;
309   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
310   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
311   PetscTruth     roworiented = aij->roworiented;
312 
313   /* Some Variables required in the macro */
314   Mat            A = aij->A;
315   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
316   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
317   MatScalar      *aa = a->a;
318   PetscTruth     ignorezeroentries = a->ignorezeroentries;
319   Mat            B = aij->B;
320   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
321   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
322   MatScalar      *ba = b->a;
323 
324   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
325   PetscInt       nonew = a->nonew;
326   MatScalar      *ap1,*ap2;
327 
328   PetscFunctionBegin;
329   if (v) PetscValidScalarPointer(v,6);
330   for (i=0; i<m; i++) {
331     if (im[i] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
334 #endif
335     if (im[i] >= rstart && im[i] < rend) {
336       row      = im[i] - rstart;
337       lastcol1 = -1;
338       rp1      = aj + ai[row];
339       ap1      = aa + ai[row];
340       rmax1    = aimax[row];
341       nrow1    = ailen[row];
342       low1     = 0;
343       high1    = nrow1;
344       lastcol2 = -1;
345       rp2      = bj + bi[row];
346       ap2      = ba + bi[row];
347       rmax2    = bimax[row];
348       nrow2    = bilen[row];
349       low2     = 0;
350       high2    = nrow2;
351 
352       for (j=0; j<n; j++) {
353         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
354         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
355         if (in[j] >= cstart && in[j] < cend){
356           col = in[j] - cstart;
357           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
358         } else if (in[j] < 0) continue;
359 #if defined(PETSC_USE_DEBUG)
360         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
361 #endif
362         else {
363           if (mat->was_assembled) {
364             if (!aij->colmap) {
365               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
366             }
367 #if defined (PETSC_USE_CTABLE)
368             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
369 	    col--;
370 #else
371             col = aij->colmap[in[j]] - 1;
372 #endif
373             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
374               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
375               col =  in[j];
376               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
377               B = aij->B;
378               b = (Mat_SeqAIJ*)B->data;
379               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
380               rp2      = bj + bi[row];
381               ap2      = ba + bi[row];
382               rmax2    = bimax[row];
383               nrow2    = bilen[row];
384               low2     = 0;
385               high2    = nrow2;
386               bm       = aij->B->rmap->n;
387               ba = b->a;
388             }
389           } else col = in[j];
390           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
391         }
392       }
393     } else {
394       if (!aij->donotstash) {
395         if (roworiented) {
396           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
397         } else {
398           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
399         }
400       }
401     }
402   }
403   PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "MatGetValues_MPIAIJ"
408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
413   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
414 
415   PetscFunctionBegin;
416   for (i=0; i<m; i++) {
417     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
418     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
419     if (idxm[i] >= rstart && idxm[i] < rend) {
420       row = idxm[i] - rstart;
421       for (j=0; j<n; j++) {
422         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
423         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
424         if (idxn[j] >= cstart && idxn[j] < cend){
425           col = idxn[j] - cstart;
426           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
427         } else {
428           if (!aij->colmap) {
429             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
430           }
431 #if defined (PETSC_USE_CTABLE)
432           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
433           col --;
434 #else
435           col = aij->colmap[idxn[j]] - 1;
436 #endif
437           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
438           else {
439             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
440           }
441         }
442       }
443     } else {
444       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
445     }
446   }
447   PetscFunctionReturn(0);
448 }
449 
450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscErrorCode ierr;
458   PetscInt       nstash,reallocs;
459   InsertMode     addv;
460 
461   PetscFunctionBegin;
462   if (aij->donotstash) {
463     PetscFunctionReturn(0);
464   }
465 
466   /* make sure all processors are either in INSERTMODE or ADDMODE */
467   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
468   if (addv == (ADD_VALUES|INSERT_VALUES)) {
469     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
470   }
471   mat->insertmode = addv; /* in case this processor had no cache */
472 
473   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
474   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
475   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
485   PetscErrorCode ierr;
486   PetscMPIInt    n;
487   PetscInt       i,j,rstart,ncols,flg;
488   PetscInt       *row,*col;
489   PetscTruth     other_disassembled;
490   PetscScalar    *val;
491   InsertMode     addv = mat->insertmode;
492 
493   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
494   PetscFunctionBegin;
495   if (!aij->donotstash) {
496     while (1) {
497       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
498       if (!flg) break;
499 
500       for (i=0; i<n;) {
501         /* Now identify the consecutive vals belonging to the same row */
502         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
503         if (j < n) ncols = j-i;
504         else       ncols = n-i;
505         /* Now assemble all these values with a single function call */
506         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
507         i = j;
508       }
509     }
510     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
511   }
512   a->compressedrow.use     = PETSC_FALSE;
513   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
514   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
515 
516   /* determine if any processor has disassembled, if so we must
517      also disassemble ourselfs, in order that we may reassemble. */
518   /*
519      if nonzero structure of submatrix B cannot change then we know that
520      no processor disassembled thus we can skip this stuff
521   */
522   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
523     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
524     if (mat->was_assembled && !other_disassembled) {
525       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
526     }
527   }
528   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
529     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
530   }
531   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
532   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
533   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
534   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
535 
536   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
537   aij->rowvalues = 0;
538 
539   /* used by MatAXPY() */
540   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
541   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
542 
543   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
544   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
545   PetscFunctionReturn(0);
546 }
547 
548 #undef __FUNCT__
549 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
550 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
551 {
552   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
553   PetscErrorCode ierr;
554 
555   PetscFunctionBegin;
556   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
557   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
558   PetscFunctionReturn(0);
559 }
560 
561 #undef __FUNCT__
562 #define __FUNCT__ "MatZeroRows_MPIAIJ"
563 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
564 {
565   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
566   PetscErrorCode ierr;
567   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
568   PetscInt       i,*owners = A->rmap->range;
569   PetscInt       *nprocs,j,idx,nsends,row;
570   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
571   PetscInt       *rvalues,count,base,slen,*source;
572   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
573   MPI_Comm       comm = ((PetscObject)A)->comm;
574   MPI_Request    *send_waits,*recv_waits;
575   MPI_Status     recv_status,*send_status;
576 #if defined(PETSC_DEBUG)
577   PetscTruth     found = PETSC_FALSE;
578 #endif
579 
580   PetscFunctionBegin;
581   /*  first count number of contributors to each processor */
582   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
583   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
584   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
585   j = 0;
586   for (i=0; i<N; i++) {
587     if (lastidx > (idx = rows[i])) j = 0;
588     lastidx = idx;
589     for (; j<size; j++) {
590       if (idx >= owners[j] && idx < owners[j+1]) {
591         nprocs[2*j]++;
592         nprocs[2*j+1] = 1;
593         owner[i] = j;
594 #if defined(PETSC_DEBUG)
595         found = PETSC_TRUE;
596 #endif
597         break;
598       }
599     }
600 #if defined(PETSC_DEBUG)
601     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
602     found = PETSC_FALSE;
603 #endif
604   }
605   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
606 
607   /* inform other processors of number of messages and max length*/
608   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
609 
610   /* post receives:   */
611   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
612   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
613   for (i=0; i<nrecvs; i++) {
614     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
615   }
616 
617   /* do sends:
618       1) starts[i] gives the starting index in svalues for stuff going to
619          the ith processor
620   */
621   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
622   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
623   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
624   starts[0] = 0;
625   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
626   for (i=0; i<N; i++) {
627     svalues[starts[owner[i]]++] = rows[i];
628   }
629 
630   starts[0] = 0;
631   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
632   count = 0;
633   for (i=0; i<size; i++) {
634     if (nprocs[2*i+1]) {
635       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
636     }
637   }
638   ierr = PetscFree(starts);CHKERRQ(ierr);
639 
640   base = owners[rank];
641 
642   /*  wait on receives */
643   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
644   count  = nrecvs; slen = 0;
645   while (count) {
646     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
647     /* unpack receives into our local space */
648     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
649     source[imdex]  = recv_status.MPI_SOURCE;
650     lens[imdex]    = n;
651     slen          += n;
652     count--;
653   }
654   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
655 
656   /* move the data into the send scatter */
657   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
658   count = 0;
659   for (i=0; i<nrecvs; i++) {
660     values = rvalues + i*nmax;
661     for (j=0; j<lens[i]; j++) {
662       lrows[count++] = values[j] - base;
663     }
664   }
665   ierr = PetscFree(rvalues);CHKERRQ(ierr);
666   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
667   ierr = PetscFree(owner);CHKERRQ(ierr);
668   ierr = PetscFree(nprocs);CHKERRQ(ierr);
669 
670   /* actually zap the local rows */
671   /*
672         Zero the required rows. If the "diagonal block" of the matrix
673      is square and the user wishes to set the diagonal we use separate
674      code so that MatSetValues() is not called for each diagonal allocating
675      new memory, thus calling lots of mallocs and slowing things down.
676 
677   */
678   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
679   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
680   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
681     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
682   } else if (diag != 0.0) {
683     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
684     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
685       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
687     }
688     for (i = 0; i < slen; i++) {
689       row  = lrows[i] + rstart;
690       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
691     }
692     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
693     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
694   } else {
695     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
696   }
697   ierr = PetscFree(lrows);CHKERRQ(ierr);
698 
699   /* wait on sends */
700   if (nsends) {
701     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
702     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
703     ierr = PetscFree(send_status);CHKERRQ(ierr);
704   }
705   ierr = PetscFree(send_waits);CHKERRQ(ierr);
706   ierr = PetscFree(svalues);CHKERRQ(ierr);
707 
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatMult_MPIAIJ"
713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
714 {
715   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
716   PetscErrorCode ierr;
717   PetscInt       nt;
718 
719   PetscFunctionBegin;
720   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
721   if (nt != A->cmap->n) {
722     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
723   }
724   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
725   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
726   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
727   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNCT__
732 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
733 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
734 {
735   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
736   PetscErrorCode ierr;
737 
738   PetscFunctionBegin;
739   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatMultAdd_MPIAIJ"
745 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
746 {
747   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
748   PetscErrorCode ierr;
749 
750   PetscFunctionBegin;
751   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
752   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
753   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
754   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
755   PetscFunctionReturn(0);
756 }
757 
758 #undef __FUNCT__
759 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
760 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
761 {
762   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
763   PetscErrorCode ierr;
764   PetscTruth     merged;
765 
766   PetscFunctionBegin;
767   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
768   /* do nondiagonal part */
769   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
770   if (!merged) {
771     /* send it on its way */
772     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
773     /* do local part */
774     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
775     /* receive remote parts: note this assumes the values are not actually */
776     /* added in yy until the next line, */
777     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
778   } else {
779     /* do local part */
780     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
781     /* send it on its way */
782     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
783     /* values actually were received in the Begin() but we need to call this nop */
784     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
785   }
786   PetscFunctionReturn(0);
787 }
788 
789 EXTERN_C_BEGIN
790 #undef __FUNCT__
791 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
792 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
793 {
794   MPI_Comm       comm;
795   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
796   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
797   IS             Me,Notme;
798   PetscErrorCode ierr;
799   PetscInt       M,N,first,last,*notme,i;
800   PetscMPIInt    size;
801 
802   PetscFunctionBegin;
803 
804   /* Easy test: symmetric diagonal block */
805   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
806   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
807   if (!*f) PetscFunctionReturn(0);
808   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
809   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
810   if (size == 1) PetscFunctionReturn(0);
811 
812   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
813   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
814   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
815   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
816   for (i=0; i<first; i++) notme[i] = i;
817   for (i=last; i<M; i++) notme[i-last+first] = i;
818   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
819   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
820   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
821   Aoff = Aoffs[0];
822   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
823   Boff = Boffs[0];
824   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
825   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
826   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
827   ierr = ISDestroy(Me);CHKERRQ(ierr);
828   ierr = ISDestroy(Notme);CHKERRQ(ierr);
829 
830   PetscFunctionReturn(0);
831 }
832 EXTERN_C_END
833 
834 #undef __FUNCT__
835 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
836 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
837 {
838   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
839   PetscErrorCode ierr;
840 
841   PetscFunctionBegin;
842   /* do nondiagonal part */
843   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
844   /* send it on its way */
845   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
846   /* do local part */
847   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
848   /* receive remote parts */
849   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
850   PetscFunctionReturn(0);
851 }
852 
853 /*
854   This only works correctly for square matrices where the subblock A->A is the
855    diagonal block
856 */
857 #undef __FUNCT__
858 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
859 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
860 {
861   PetscErrorCode ierr;
862   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
863 
864   PetscFunctionBegin;
865   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
866   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) {
867     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
868   }
869   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatScale_MPIAIJ"
875 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
876 {
877   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
878   PetscErrorCode ierr;
879 
880   PetscFunctionBegin;
881   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
882   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
883   PetscFunctionReturn(0);
884 }
885 
886 #undef __FUNCT__
887 #define __FUNCT__ "MatDestroy_MPIAIJ"
888 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
889 {
890   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
891   PetscErrorCode ierr;
892 
893   PetscFunctionBegin;
894 #if defined(PETSC_USE_LOG)
895   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
896 #endif
897   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
898   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
899   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
900   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
901 #if defined (PETSC_USE_CTABLE)
902   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
903 #else
904   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
905 #endif
906   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
907   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
908   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
909   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
910   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
911   ierr = PetscFree(aij);CHKERRQ(ierr);
912 
913   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
915   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
916   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
917   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
918   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
919   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
920   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
921   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
922   PetscFunctionReturn(0);
923 }
924 
925 #undef __FUNCT__
926 #define __FUNCT__ "MatView_MPIAIJ_Binary"
927 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
928 {
929   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
930   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
931   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
932   PetscErrorCode    ierr;
933   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
934   int               fd;
935   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
936   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
937   PetscScalar       *column_values;
938 
939   PetscFunctionBegin;
940   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
941   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
942   nz   = A->nz + B->nz;
943   if (!rank) {
944     header[0] = MAT_FILE_COOKIE;
945     header[1] = mat->rmap->N;
946     header[2] = mat->cmap->N;
947     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
948     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
949     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
950     /* get largest number of rows any processor has */
951     rlen = mat->rmap->n;
952     range = mat->rmap->range;
953     for (i=1; i<size; i++) {
954       rlen = PetscMax(rlen,range[i+1] - range[i]);
955     }
956   } else {
957     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
958     rlen = mat->rmap->n;
959   }
960 
961   /* load up the local row counts */
962   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
963   for (i=0; i<mat->rmap->n; i++) {
964     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
965   }
966 
967   /* store the row lengths to the file */
968   if (!rank) {
969     MPI_Status status;
970     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
971     for (i=1; i<size; i++) {
972       rlen = range[i+1] - range[i];
973       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
974       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
975     }
976   } else {
977     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
978   }
979   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
980 
981   /* load up the local column indices */
982   nzmax = nz; /* )th processor needs space a largest processor needs */
983   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
984   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
985   cnt  = 0;
986   for (i=0; i<mat->rmap->n; i++) {
987     for (j=B->i[i]; j<B->i[i+1]; j++) {
988       if ( (col = garray[B->j[j]]) > cstart) break;
989       column_indices[cnt++] = col;
990     }
991     for (k=A->i[i]; k<A->i[i+1]; k++) {
992       column_indices[cnt++] = A->j[k] + cstart;
993     }
994     for (; j<B->i[i+1]; j++) {
995       column_indices[cnt++] = garray[B->j[j]];
996     }
997   }
998   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
999 
1000   /* store the column indices to the file */
1001   if (!rank) {
1002     MPI_Status status;
1003     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1004     for (i=1; i<size; i++) {
1005       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1006       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1007       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1008       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1009     }
1010   } else {
1011     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1012     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1013   }
1014   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1015 
1016   /* load up the local column values */
1017   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1018   cnt  = 0;
1019   for (i=0; i<mat->rmap->n; i++) {
1020     for (j=B->i[i]; j<B->i[i+1]; j++) {
1021       if ( garray[B->j[j]] > cstart) break;
1022       column_values[cnt++] = B->a[j];
1023     }
1024     for (k=A->i[i]; k<A->i[i+1]; k++) {
1025       column_values[cnt++] = A->a[k];
1026     }
1027     for (; j<B->i[i+1]; j++) {
1028       column_values[cnt++] = B->a[j];
1029     }
1030   }
1031   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1032 
1033   /* store the column values to the file */
1034   if (!rank) {
1035     MPI_Status status;
1036     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1037     for (i=1; i<size; i++) {
1038       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1039       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1040       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1041       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1042     }
1043   } else {
1044     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1045     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1046   }
1047   ierr = PetscFree(column_values);CHKERRQ(ierr);
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 #undef __FUNCT__
1052 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1053 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1054 {
1055   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1056   PetscErrorCode    ierr;
1057   PetscMPIInt       rank = aij->rank,size = aij->size;
1058   PetscTruth        isdraw,iascii,isbinary;
1059   PetscViewer       sviewer;
1060   PetscViewerFormat format;
1061 
1062   PetscFunctionBegin;
1063   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1064   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1065   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1066   if (iascii) {
1067     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1068     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1069       MatInfo    info;
1070       PetscTruth inodes;
1071 
1072       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1073       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1074       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1075       if (!inodes) {
1076         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1077 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1078       } else {
1079         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1080 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1081       }
1082       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1083       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1084       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1085       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1086       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1087       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1088       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1089       PetscFunctionReturn(0);
1090     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1091       PetscInt   inodecount,inodelimit,*inodes;
1092       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1093       if (inodes) {
1094         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1095       } else {
1096         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1097       }
1098       PetscFunctionReturn(0);
1099     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1100       PetscFunctionReturn(0);
1101     }
1102   } else if (isbinary) {
1103     if (size == 1) {
1104       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1105       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1106     } else {
1107       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1108     }
1109     PetscFunctionReturn(0);
1110   } else if (isdraw) {
1111     PetscDraw  draw;
1112     PetscTruth isnull;
1113     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1114     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1115   }
1116 
1117   if (size == 1) {
1118     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1119     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1120   } else {
1121     /* assemble the entire matrix onto first processor. */
1122     Mat         A;
1123     Mat_SeqAIJ  *Aloc;
1124     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1125     MatScalar   *a;
1126 
1127     if (mat->rmap->N > 1024) {
1128       PetscTruth flg = PETSC_FALSE;
1129 
1130       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1131       if (!flg) {
1132         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1133       }
1134     }
1135 
1136     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1137     if (!rank) {
1138       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1139     } else {
1140       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1141     }
1142     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1143     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1144     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1145     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1146 
1147     /* copy over the A part */
1148     Aloc = (Mat_SeqAIJ*)aij->A->data;
1149     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1150     row = mat->rmap->rstart;
1151     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1152     for (i=0; i<m; i++) {
1153       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1154       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1155     }
1156     aj = Aloc->j;
1157     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1158 
1159     /* copy over the B part */
1160     Aloc = (Mat_SeqAIJ*)aij->B->data;
1161     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1162     row  = mat->rmap->rstart;
1163     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1164     ct   = cols;
1165     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1166     for (i=0; i<m; i++) {
1167       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1168       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1169     }
1170     ierr = PetscFree(ct);CHKERRQ(ierr);
1171     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1172     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1173     /*
1174        Everyone has to call to draw the matrix since the graphics waits are
1175        synchronized across all processors that share the PetscDraw object
1176     */
1177     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1178     if (!rank) {
1179       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1180       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1181     }
1182     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1183     ierr = MatDestroy(A);CHKERRQ(ierr);
1184   }
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatView_MPIAIJ"
1190 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1191 {
1192   PetscErrorCode ierr;
1193   PetscTruth     iascii,isdraw,issocket,isbinary;
1194 
1195   PetscFunctionBegin;
1196   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1197   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1198   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1199   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1200   if (iascii || isdraw || isbinary || issocket) {
1201     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1202   } else {
1203     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1204   }
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 #undef __FUNCT__
1209 #define __FUNCT__ "MatSOR_MPIAIJ"
1210 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1211 {
1212   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1213   PetscErrorCode ierr;
1214   Vec            bb1 = 0;
1215   PetscTruth     hasop;
1216 
1217   PetscFunctionBegin;
1218   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1219     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1220   }
1221 
1222   if (flag == SOR_APPLY_UPPER) {
1223     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1224     PetscFunctionReturn(0);
1225   }
1226 
1227   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1228     if (flag & SOR_ZERO_INITIAL_GUESS) {
1229       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1230       its--;
1231     }
1232 
1233     while (its--) {
1234       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1235       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1236 
1237       /* update rhs: bb1 = bb - B*x */
1238       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1239       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1240 
1241       /* local sweep */
1242       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1243     }
1244   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1245     if (flag & SOR_ZERO_INITIAL_GUESS) {
1246       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1247       its--;
1248     }
1249     while (its--) {
1250       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1251       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1252 
1253       /* update rhs: bb1 = bb - B*x */
1254       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1255       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1256 
1257       /* local sweep */
1258       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1259     }
1260   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1261     if (flag & SOR_ZERO_INITIAL_GUESS) {
1262       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1263       its--;
1264     }
1265     while (its--) {
1266       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1267       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1268 
1269       /* update rhs: bb1 = bb - B*x */
1270       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1271       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1272 
1273       /* local sweep */
1274       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1275     }
1276   }  else if (flag & SOR_EISENSTAT) {
1277     Vec         xx1;
1278 
1279     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1280     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1281 
1282     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1283     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1284     if (!mat->diag) {
1285       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1286       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1287     }
1288     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1289     if (hasop) {
1290       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1291     } else {
1292       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1293     }
1294     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1295 
1296     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1297 
1298     /* local sweep */
1299     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1300     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1301     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1302   } else {
1303     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1304   }
1305 
1306   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatPermute_MPIAIJ"
1312 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1313 {
1314   MPI_Comm       comm,pcomm;
1315   PetscInt       first,local_size,nrows;
1316   const PetscInt *rows;
1317   PetscMPIInt    size;
1318   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1319   PetscErrorCode ierr;
1320 
1321   PetscFunctionBegin;
1322   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1323   /* make a collective version of 'rowp' */
1324   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1325   if (pcomm==comm) {
1326     crowp = rowp;
1327   } else {
1328     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1329     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1330     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1331     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1332   }
1333   /* collect the global row permutation and invert it */
1334   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1335   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1336   if (pcomm!=comm) {
1337     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1338   }
1339   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1340   /* get the local target indices */
1341   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1342   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1343   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1344   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1345   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1346   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1347   /* the column permutation is so much easier;
1348      make a local version of 'colp' and invert it */
1349   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1350   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1351   if (size==1) {
1352     lcolp = colp;
1353   } else {
1354     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1355     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1356     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1357   }
1358   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1359   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1360   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1361   if (size>1) {
1362     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1363     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1364   }
1365   /* now we just get the submatrix */
1366   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1367   /* clean up */
1368   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1369   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 #undef __FUNCT__
1374 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1375 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1376 {
1377   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1378   Mat            A = mat->A,B = mat->B;
1379   PetscErrorCode ierr;
1380   PetscReal      isend[5],irecv[5];
1381 
1382   PetscFunctionBegin;
1383   info->block_size     = 1.0;
1384   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1385   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1386   isend[3] = info->memory;  isend[4] = info->mallocs;
1387   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1388   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1389   isend[3] += info->memory;  isend[4] += info->mallocs;
1390   if (flag == MAT_LOCAL) {
1391     info->nz_used      = isend[0];
1392     info->nz_allocated = isend[1];
1393     info->nz_unneeded  = isend[2];
1394     info->memory       = isend[3];
1395     info->mallocs      = isend[4];
1396   } else if (flag == MAT_GLOBAL_MAX) {
1397     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1398     info->nz_used      = irecv[0];
1399     info->nz_allocated = irecv[1];
1400     info->nz_unneeded  = irecv[2];
1401     info->memory       = irecv[3];
1402     info->mallocs      = irecv[4];
1403   } else if (flag == MAT_GLOBAL_SUM) {
1404     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1405     info->nz_used      = irecv[0];
1406     info->nz_allocated = irecv[1];
1407     info->nz_unneeded  = irecv[2];
1408     info->memory       = irecv[3];
1409     info->mallocs      = irecv[4];
1410   }
1411   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1412   info->fill_ratio_needed = 0;
1413   info->factor_mallocs    = 0;
1414 
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 #undef __FUNCT__
1419 #define __FUNCT__ "MatSetOption_MPIAIJ"
1420 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1421 {
1422   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1423   PetscErrorCode ierr;
1424 
1425   PetscFunctionBegin;
1426   switch (op) {
1427   case MAT_NEW_NONZERO_LOCATIONS:
1428   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1429   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1430   case MAT_KEEP_NONZERO_PATTERN:
1431   case MAT_NEW_NONZERO_LOCATION_ERR:
1432   case MAT_USE_INODES:
1433   case MAT_IGNORE_ZERO_ENTRIES:
1434     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1435     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1436     break;
1437   case MAT_ROW_ORIENTED:
1438     a->roworiented = flg;
1439     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1440     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1441     break;
1442   case MAT_NEW_DIAGONALS:
1443     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1444     break;
1445   case MAT_IGNORE_OFF_PROC_ENTRIES:
1446     a->donotstash = PETSC_TRUE;
1447     break;
1448   case MAT_SYMMETRIC:
1449     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1450     break;
1451   case MAT_STRUCTURALLY_SYMMETRIC:
1452     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1453     break;
1454   case MAT_HERMITIAN:
1455     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1456     break;
1457   case MAT_SYMMETRY_ETERNAL:
1458     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1459     break;
1460   default:
1461     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1462   }
1463   PetscFunctionReturn(0);
1464 }
1465 
1466 #undef __FUNCT__
1467 #define __FUNCT__ "MatGetRow_MPIAIJ"
1468 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1469 {
1470   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1471   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1472   PetscErrorCode ierr;
1473   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1474   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1475   PetscInt       *cmap,*idx_p;
1476 
1477   PetscFunctionBegin;
1478   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1479   mat->getrowactive = PETSC_TRUE;
1480 
1481   if (!mat->rowvalues && (idx || v)) {
1482     /*
1483         allocate enough space to hold information from the longest row.
1484     */
1485     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1486     PetscInt   max = 1,tmp;
1487     for (i=0; i<matin->rmap->n; i++) {
1488       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1489       if (max < tmp) { max = tmp; }
1490     }
1491     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1492   }
1493 
1494   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1495   lrow = row - rstart;
1496 
1497   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1498   if (!v)   {pvA = 0; pvB = 0;}
1499   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1500   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1501   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1502   nztot = nzA + nzB;
1503 
1504   cmap  = mat->garray;
1505   if (v  || idx) {
1506     if (nztot) {
1507       /* Sort by increasing column numbers, assuming A and B already sorted */
1508       PetscInt imark = -1;
1509       if (v) {
1510         *v = v_p = mat->rowvalues;
1511         for (i=0; i<nzB; i++) {
1512           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1513           else break;
1514         }
1515         imark = i;
1516         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1517         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1518       }
1519       if (idx) {
1520         *idx = idx_p = mat->rowindices;
1521         if (imark > -1) {
1522           for (i=0; i<imark; i++) {
1523             idx_p[i] = cmap[cworkB[i]];
1524           }
1525         } else {
1526           for (i=0; i<nzB; i++) {
1527             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1528             else break;
1529           }
1530           imark = i;
1531         }
1532         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1533         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1534       }
1535     } else {
1536       if (idx) *idx = 0;
1537       if (v)   *v   = 0;
1538     }
1539   }
1540   *nz = nztot;
1541   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1542   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1543   PetscFunctionReturn(0);
1544 }
1545 
1546 #undef __FUNCT__
1547 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1548 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1549 {
1550   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1551 
1552   PetscFunctionBegin;
1553   if (!aij->getrowactive) {
1554     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1555   }
1556   aij->getrowactive = PETSC_FALSE;
1557   PetscFunctionReturn(0);
1558 }
1559 
1560 #undef __FUNCT__
1561 #define __FUNCT__ "MatNorm_MPIAIJ"
1562 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1563 {
1564   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1565   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1566   PetscErrorCode ierr;
1567   PetscInt       i,j,cstart = mat->cmap->rstart;
1568   PetscReal      sum = 0.0;
1569   MatScalar      *v;
1570 
1571   PetscFunctionBegin;
1572   if (aij->size == 1) {
1573     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1574   } else {
1575     if (type == NORM_FROBENIUS) {
1576       v = amat->a;
1577       for (i=0; i<amat->nz; i++) {
1578 #if defined(PETSC_USE_COMPLEX)
1579         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1580 #else
1581         sum += (*v)*(*v); v++;
1582 #endif
1583       }
1584       v = bmat->a;
1585       for (i=0; i<bmat->nz; i++) {
1586 #if defined(PETSC_USE_COMPLEX)
1587         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1588 #else
1589         sum += (*v)*(*v); v++;
1590 #endif
1591       }
1592       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1593       *norm = sqrt(*norm);
1594     } else if (type == NORM_1) { /* max column norm */
1595       PetscReal *tmp,*tmp2;
1596       PetscInt  *jj,*garray = aij->garray;
1597       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1598       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1599       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1600       *norm = 0.0;
1601       v = amat->a; jj = amat->j;
1602       for (j=0; j<amat->nz; j++) {
1603         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1604       }
1605       v = bmat->a; jj = bmat->j;
1606       for (j=0; j<bmat->nz; j++) {
1607         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1608       }
1609       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1610       for (j=0; j<mat->cmap->N; j++) {
1611         if (tmp2[j] > *norm) *norm = tmp2[j];
1612       }
1613       ierr = PetscFree(tmp);CHKERRQ(ierr);
1614       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1615     } else if (type == NORM_INFINITY) { /* max row norm */
1616       PetscReal ntemp = 0.0;
1617       for (j=0; j<aij->A->rmap->n; j++) {
1618         v = amat->a + amat->i[j];
1619         sum = 0.0;
1620         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1621           sum += PetscAbsScalar(*v); v++;
1622         }
1623         v = bmat->a + bmat->i[j];
1624         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1625           sum += PetscAbsScalar(*v); v++;
1626         }
1627         if (sum > ntemp) ntemp = sum;
1628       }
1629       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1630     } else {
1631       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1632     }
1633   }
1634   PetscFunctionReturn(0);
1635 }
1636 
1637 #undef __FUNCT__
1638 #define __FUNCT__ "MatTranspose_MPIAIJ"
1639 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1640 {
1641   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1642   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1643   PetscErrorCode ierr;
1644   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1645   PetscInt       cstart=A->cmap->rstart,ncol;
1646   Mat            B;
1647   MatScalar      *array;
1648 
1649   PetscFunctionBegin;
1650   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1651 
1652   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1653   ai = Aloc->i; aj = Aloc->j;
1654   bi = Bloc->i; bj = Bloc->j;
1655   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1656     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1657     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1658     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1659     for (i=0; i<ai[ma]; i++){
1660       d_nnz[aj[i]] ++;
1661       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1662     }
1663 
1664     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1665     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1666     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1667     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1668     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1669   } else {
1670     B = *matout;
1671   }
1672 
1673   /* copy over the A part */
1674   array = Aloc->a;
1675   row = A->rmap->rstart;
1676   for (i=0; i<ma; i++) {
1677     ncol = ai[i+1]-ai[i];
1678     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1679     row++; array += ncol; aj += ncol;
1680   }
1681   aj = Aloc->j;
1682   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1683 
1684   /* copy over the B part */
1685   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1686   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1687   array = Bloc->a;
1688   row = A->rmap->rstart;
1689   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1690   cols_tmp = cols;
1691   for (i=0; i<mb; i++) {
1692     ncol = bi[i+1]-bi[i];
1693     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1694     row++; array += ncol; cols_tmp += ncol;
1695   }
1696   ierr = PetscFree(cols);CHKERRQ(ierr);
1697 
1698   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1699   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1700   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1701     *matout = B;
1702   } else {
1703     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1704   }
1705   PetscFunctionReturn(0);
1706 }
1707 
1708 #undef __FUNCT__
1709 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1710 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1711 {
1712   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1713   Mat            a = aij->A,b = aij->B;
1714   PetscErrorCode ierr;
1715   PetscInt       s1,s2,s3;
1716 
1717   PetscFunctionBegin;
1718   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1719   if (rr) {
1720     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1721     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1722     /* Overlap communication with computation. */
1723     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1724   }
1725   if (ll) {
1726     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1727     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1728     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1729   }
1730   /* scale  the diagonal block */
1731   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1732 
1733   if (rr) {
1734     /* Do a scatter end and then right scale the off-diagonal block */
1735     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1736     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1737   }
1738 
1739   PetscFunctionReturn(0);
1740 }
1741 
1742 #undef __FUNCT__
1743 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1744 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1745 {
1746   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1747   PetscErrorCode ierr;
1748 
1749   PetscFunctionBegin;
1750   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1751   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1752   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
1753   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
1754   PetscFunctionReturn(0);
1755 }
1756 #undef __FUNCT__
1757 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1758 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1759 {
1760   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1761   PetscErrorCode ierr;
1762 
1763   PetscFunctionBegin;
1764   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1765   PetscFunctionReturn(0);
1766 }
1767 
1768 #undef __FUNCT__
1769 #define __FUNCT__ "MatEqual_MPIAIJ"
1770 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1771 {
1772   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1773   Mat            a,b,c,d;
1774   PetscTruth     flg;
1775   PetscErrorCode ierr;
1776 
1777   PetscFunctionBegin;
1778   a = matA->A; b = matA->B;
1779   c = matB->A; d = matB->B;
1780 
1781   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1782   if (flg) {
1783     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1784   }
1785   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1786   PetscFunctionReturn(0);
1787 }
1788 
1789 #undef __FUNCT__
1790 #define __FUNCT__ "MatCopy_MPIAIJ"
1791 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1792 {
1793   PetscErrorCode ierr;
1794   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1795   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1796 
1797   PetscFunctionBegin;
1798   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1799   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1800     /* because of the column compression in the off-processor part of the matrix a->B,
1801        the number of columns in a->B and b->B may be different, hence we cannot call
1802        the MatCopy() directly on the two parts. If need be, we can provide a more
1803        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1804        then copying the submatrices */
1805     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1806   } else {
1807     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1808     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1809   }
1810   PetscFunctionReturn(0);
1811 }
1812 
1813 #undef __FUNCT__
1814 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1815 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1816 {
1817   PetscErrorCode ierr;
1818 
1819   PetscFunctionBegin;
1820   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1821   PetscFunctionReturn(0);
1822 }
1823 
1824 #undef __FUNCT__
1825 #define __FUNCT__ "MatAXPY_MPIAIJ"
1826 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1827 {
1828   PetscErrorCode ierr;
1829   PetscInt       i;
1830   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1831   PetscBLASInt   bnz,one=1;
1832   Mat_SeqAIJ     *x,*y;
1833 
1834   PetscFunctionBegin;
1835   if (str == SAME_NONZERO_PATTERN) {
1836     PetscScalar alpha = a;
1837     x = (Mat_SeqAIJ *)xx->A->data;
1838     y = (Mat_SeqAIJ *)yy->A->data;
1839     bnz = PetscBLASIntCast(x->nz);
1840     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1841     x = (Mat_SeqAIJ *)xx->B->data;
1842     y = (Mat_SeqAIJ *)yy->B->data;
1843     bnz = PetscBLASIntCast(x->nz);
1844     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1845   } else if (str == SUBSET_NONZERO_PATTERN) {
1846     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1847 
1848     x = (Mat_SeqAIJ *)xx->B->data;
1849     y = (Mat_SeqAIJ *)yy->B->data;
1850     if (y->xtoy && y->XtoY != xx->B) {
1851       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1852       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1853     }
1854     if (!y->xtoy) { /* get xtoy */
1855       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1856       y->XtoY = xx->B;
1857       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1858     }
1859     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1860   } else {
1861     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1862   }
1863   PetscFunctionReturn(0);
1864 }
1865 
1866 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1867 
1868 #undef __FUNCT__
1869 #define __FUNCT__ "MatConjugate_MPIAIJ"
1870 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1871 {
1872 #if defined(PETSC_USE_COMPLEX)
1873   PetscErrorCode ierr;
1874   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1875 
1876   PetscFunctionBegin;
1877   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1878   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1879 #else
1880   PetscFunctionBegin;
1881 #endif
1882   PetscFunctionReturn(0);
1883 }
1884 
1885 #undef __FUNCT__
1886 #define __FUNCT__ "MatRealPart_MPIAIJ"
1887 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1888 {
1889   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1890   PetscErrorCode ierr;
1891 
1892   PetscFunctionBegin;
1893   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1894   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1895   PetscFunctionReturn(0);
1896 }
1897 
1898 #undef __FUNCT__
1899 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1900 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1901 {
1902   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1903   PetscErrorCode ierr;
1904 
1905   PetscFunctionBegin;
1906   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1907   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1908   PetscFunctionReturn(0);
1909 }
1910 
1911 #ifdef PETSC_HAVE_PBGL
1912 
1913 #include <boost/parallel/mpi/bsp_process_group.hpp>
1914 #include <boost/graph/distributed/ilu_default_graph.hpp>
1915 #include <boost/graph/distributed/ilu_0_block.hpp>
1916 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1917 #include <boost/graph/distributed/petsc/interface.hpp>
1918 #include <boost/multi_array.hpp>
1919 #include <boost/parallel/distributed_property_map->hpp>
1920 
1921 #undef __FUNCT__
1922 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1923 /*
1924   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1925 */
1926 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1927 {
1928   namespace petsc = boost::distributed::petsc;
1929 
1930   namespace graph_dist = boost::graph::distributed;
1931   using boost::graph::distributed::ilu_default::process_group_type;
1932   using boost::graph::ilu_permuted;
1933 
1934   PetscTruth      row_identity, col_identity;
1935   PetscContainer  c;
1936   PetscInt        m, n, M, N;
1937   PetscErrorCode  ierr;
1938 
1939   PetscFunctionBegin;
1940   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1941   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1942   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1943   if (!row_identity || !col_identity) {
1944     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1945   }
1946 
1947   process_group_type pg;
1948   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1949   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1950   lgraph_type&   level_graph = *lgraph_p;
1951   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1952 
1953   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1954   ilu_permuted(level_graph);
1955 
1956   /* put together the new matrix */
1957   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1958   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1959   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1960   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1961   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1962   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1963   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1964 
1965   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1966   ierr = PetscContainerSetPointer(c, lgraph_p);
1967   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1968   PetscFunctionReturn(0);
1969 }
1970 
1971 #undef __FUNCT__
1972 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1973 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1974 {
1975   PetscFunctionBegin;
1976   PetscFunctionReturn(0);
1977 }
1978 
1979 #undef __FUNCT__
1980 #define __FUNCT__ "MatSolve_MPIAIJ"
1981 /*
1982   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1983 */
1984 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1985 {
1986   namespace graph_dist = boost::graph::distributed;
1987 
1988   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1989   lgraph_type*   lgraph_p;
1990   PetscContainer c;
1991   PetscErrorCode ierr;
1992 
1993   PetscFunctionBegin;
1994   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1995   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1996   ierr = VecCopy(b, x);CHKERRQ(ierr);
1997 
1998   PetscScalar* array_x;
1999   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2000   PetscInt sx;
2001   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2002 
2003   PetscScalar* array_b;
2004   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2005   PetscInt sb;
2006   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2007 
2008   lgraph_type&   level_graph = *lgraph_p;
2009   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2010 
2011   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2012   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2013                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2014 
2015   typedef boost::iterator_property_map<array_ref_type::iterator,
2016                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2017   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2018                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2019 
2020   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2021 
2022   PetscFunctionReturn(0);
2023 }
2024 #endif
2025 
2026 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2027   PetscInt       nzlocal,nsends,nrecvs;
2028   PetscMPIInt    *send_rank,*recv_rank;
2029   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2030   PetscScalar    *sbuf_a,**rbuf_a;
2031   PetscErrorCode (*MatDestroy)(Mat);
2032 } Mat_Redundant;
2033 
2034 #undef __FUNCT__
2035 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2036 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2037 {
2038   PetscErrorCode       ierr;
2039   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2040   PetscInt             i;
2041 
2042   PetscFunctionBegin;
2043   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2044   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2045   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2046   for (i=0; i<redund->nrecvs; i++){
2047     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2048     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2049   }
2050   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2051   ierr = PetscFree(redund);CHKERRQ(ierr);
2052   PetscFunctionReturn(0);
2053 }
2054 
2055 #undef __FUNCT__
2056 #define __FUNCT__ "MatDestroy_MatRedundant"
2057 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2058 {
2059   PetscErrorCode  ierr;
2060   PetscContainer  container;
2061   Mat_Redundant   *redund=PETSC_NULL;
2062 
2063   PetscFunctionBegin;
2064   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2065   if (container) {
2066     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2067   } else {
2068     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2069   }
2070   A->ops->destroy = redund->MatDestroy;
2071   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2072   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2073   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2074   PetscFunctionReturn(0);
2075 }
2076 
2077 #undef __FUNCT__
2078 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2079 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2080 {
2081   PetscMPIInt    rank,size;
2082   MPI_Comm       comm=((PetscObject)mat)->comm;
2083   PetscErrorCode ierr;
2084   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2085   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2086   PetscInt       *rowrange=mat->rmap->range;
2087   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2088   Mat            A=aij->A,B=aij->B,C=*matredundant;
2089   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2090   PetscScalar    *sbuf_a;
2091   PetscInt       nzlocal=a->nz+b->nz;
2092   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2093   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2094   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2095   MatScalar      *aworkA,*aworkB;
2096   PetscScalar    *vals;
2097   PetscMPIInt    tag1,tag2,tag3,imdex;
2098   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2099                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2100   MPI_Status     recv_status,*send_status;
2101   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2102   PetscInt       **rbuf_j=PETSC_NULL;
2103   PetscScalar    **rbuf_a=PETSC_NULL;
2104   Mat_Redundant  *redund=PETSC_NULL;
2105   PetscContainer container;
2106 
2107   PetscFunctionBegin;
2108   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2109   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2110 
2111   if (reuse == MAT_REUSE_MATRIX) {
2112     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2113     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2114     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2115     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2116     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2117     if (container) {
2118       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2119     } else {
2120       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2121     }
2122     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2123 
2124     nsends    = redund->nsends;
2125     nrecvs    = redund->nrecvs;
2126     send_rank = redund->send_rank;
2127     recv_rank = redund->recv_rank;
2128     sbuf_nz   = redund->sbuf_nz;
2129     rbuf_nz   = redund->rbuf_nz;
2130     sbuf_j    = redund->sbuf_j;
2131     sbuf_a    = redund->sbuf_a;
2132     rbuf_j    = redund->rbuf_j;
2133     rbuf_a    = redund->rbuf_a;
2134   }
2135 
2136   if (reuse == MAT_INITIAL_MATRIX){
2137     PetscMPIInt  subrank,subsize;
2138     PetscInt     nleftover,np_subcomm;
2139     /* get the destination processors' id send_rank, nsends and nrecvs */
2140     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2141     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2142     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2143     np_subcomm = size/nsubcomm;
2144     nleftover  = size - nsubcomm*np_subcomm;
2145     nsends = 0; nrecvs = 0;
2146     for (i=0; i<size; i++){ /* i=rank*/
2147       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2148         send_rank[nsends] = i; nsends++;
2149         recv_rank[nrecvs++] = i;
2150       }
2151     }
2152     if (rank >= size - nleftover){/* this proc is a leftover processor */
2153       i = size-nleftover-1;
2154       j = 0;
2155       while (j < nsubcomm - nleftover){
2156         send_rank[nsends++] = i;
2157         i--; j++;
2158       }
2159     }
2160 
2161     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2162       for (i=0; i<nleftover; i++){
2163         recv_rank[nrecvs++] = size-nleftover+i;
2164       }
2165     }
2166 
2167     /* allocate sbuf_j, sbuf_a */
2168     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2169     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2170     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2171   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2172 
2173   /* copy mat's local entries into the buffers */
2174   if (reuse == MAT_INITIAL_MATRIX){
2175     rownz_max = 0;
2176     rptr = sbuf_j;
2177     cols = sbuf_j + rend-rstart + 1;
2178     vals = sbuf_a;
2179     rptr[0] = 0;
2180     for (i=0; i<rend-rstart; i++){
2181       row = i + rstart;
2182       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2183       ncols  = nzA + nzB;
2184       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2185       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2186       /* load the column indices for this row into cols */
2187       lwrite = 0;
2188       for (l=0; l<nzB; l++) {
2189         if ((ctmp = bmap[cworkB[l]]) < cstart){
2190           vals[lwrite]   = aworkB[l];
2191           cols[lwrite++] = ctmp;
2192         }
2193       }
2194       for (l=0; l<nzA; l++){
2195         vals[lwrite]   = aworkA[l];
2196         cols[lwrite++] = cstart + cworkA[l];
2197       }
2198       for (l=0; l<nzB; l++) {
2199         if ((ctmp = bmap[cworkB[l]]) >= cend){
2200           vals[lwrite]   = aworkB[l];
2201           cols[lwrite++] = ctmp;
2202         }
2203       }
2204       vals += ncols;
2205       cols += ncols;
2206       rptr[i+1] = rptr[i] + ncols;
2207       if (rownz_max < ncols) rownz_max = ncols;
2208     }
2209     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2210   } else { /* only copy matrix values into sbuf_a */
2211     rptr = sbuf_j;
2212     vals = sbuf_a;
2213     rptr[0] = 0;
2214     for (i=0; i<rend-rstart; i++){
2215       row = i + rstart;
2216       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2217       ncols  = nzA + nzB;
2218       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2219       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2220       lwrite = 0;
2221       for (l=0; l<nzB; l++) {
2222         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2223       }
2224       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2225       for (l=0; l<nzB; l++) {
2226         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2227       }
2228       vals += ncols;
2229       rptr[i+1] = rptr[i] + ncols;
2230     }
2231   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2232 
2233   /* send nzlocal to others, and recv other's nzlocal */
2234   /*--------------------------------------------------*/
2235   if (reuse == MAT_INITIAL_MATRIX){
2236     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2237     s_waits2 = s_waits3 + nsends;
2238     s_waits1 = s_waits2 + nsends;
2239     r_waits1 = s_waits1 + nsends;
2240     r_waits2 = r_waits1 + nrecvs;
2241     r_waits3 = r_waits2 + nrecvs;
2242   } else {
2243     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2244     r_waits3 = s_waits3 + nsends;
2245   }
2246 
2247   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2248   if (reuse == MAT_INITIAL_MATRIX){
2249     /* get new tags to keep the communication clean */
2250     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2251     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2252     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2253 
2254     /* post receives of other's nzlocal */
2255     for (i=0; i<nrecvs; i++){
2256       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2257     }
2258     /* send nzlocal to others */
2259     for (i=0; i<nsends; i++){
2260       sbuf_nz[i] = nzlocal;
2261       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2262     }
2263     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2264     count = nrecvs;
2265     while (count) {
2266       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2267       recv_rank[imdex] = recv_status.MPI_SOURCE;
2268       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2269       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2270 
2271       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2272       rbuf_nz[imdex] += i + 2;
2273       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2274       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2275       count--;
2276     }
2277     /* wait on sends of nzlocal */
2278     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2279     /* send mat->i,j to others, and recv from other's */
2280     /*------------------------------------------------*/
2281     for (i=0; i<nsends; i++){
2282       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2283       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2284     }
2285     /* wait on receives of mat->i,j */
2286     /*------------------------------*/
2287     count = nrecvs;
2288     while (count) {
2289       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2290       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2291       count--;
2292     }
2293     /* wait on sends of mat->i,j */
2294     /*---------------------------*/
2295     if (nsends) {
2296       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2297     }
2298   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2299 
2300   /* post receives, send and receive mat->a */
2301   /*----------------------------------------*/
2302   for (imdex=0; imdex<nrecvs; imdex++) {
2303     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2304   }
2305   for (i=0; i<nsends; i++){
2306     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2307   }
2308   count = nrecvs;
2309   while (count) {
2310     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2311     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2312     count--;
2313   }
2314   if (nsends) {
2315     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2316   }
2317 
2318   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2319 
2320   /* create redundant matrix */
2321   /*-------------------------*/
2322   if (reuse == MAT_INITIAL_MATRIX){
2323     /* compute rownz_max for preallocation */
2324     for (imdex=0; imdex<nrecvs; imdex++){
2325       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2326       rptr = rbuf_j[imdex];
2327       for (i=0; i<j; i++){
2328         ncols = rptr[i+1] - rptr[i];
2329         if (rownz_max < ncols) rownz_max = ncols;
2330       }
2331     }
2332 
2333     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2334     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2335     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2336     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2337     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2338   } else {
2339     C = *matredundant;
2340   }
2341 
2342   /* insert local matrix entries */
2343   rptr = sbuf_j;
2344   cols = sbuf_j + rend-rstart + 1;
2345   vals = sbuf_a;
2346   for (i=0; i<rend-rstart; i++){
2347     row   = i + rstart;
2348     ncols = rptr[i+1] - rptr[i];
2349     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2350     vals += ncols;
2351     cols += ncols;
2352   }
2353   /* insert received matrix entries */
2354   for (imdex=0; imdex<nrecvs; imdex++){
2355     rstart = rowrange[recv_rank[imdex]];
2356     rend   = rowrange[recv_rank[imdex]+1];
2357     rptr = rbuf_j[imdex];
2358     cols = rbuf_j[imdex] + rend-rstart + 1;
2359     vals = rbuf_a[imdex];
2360     for (i=0; i<rend-rstart; i++){
2361       row   = i + rstart;
2362       ncols = rptr[i+1] - rptr[i];
2363       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2364       vals += ncols;
2365       cols += ncols;
2366     }
2367   }
2368   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2369   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2370   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2371   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2372   if (reuse == MAT_INITIAL_MATRIX){
2373     PetscContainer container;
2374     *matredundant = C;
2375     /* create a supporting struct and attach it to C for reuse */
2376     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2377     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2378     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2379     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2380     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2381 
2382     redund->nzlocal = nzlocal;
2383     redund->nsends  = nsends;
2384     redund->nrecvs  = nrecvs;
2385     redund->send_rank = send_rank;
2386     redund->recv_rank = recv_rank;
2387     redund->sbuf_nz = sbuf_nz;
2388     redund->rbuf_nz = rbuf_nz;
2389     redund->sbuf_j  = sbuf_j;
2390     redund->sbuf_a  = sbuf_a;
2391     redund->rbuf_j  = rbuf_j;
2392     redund->rbuf_a  = rbuf_a;
2393 
2394     redund->MatDestroy = C->ops->destroy;
2395     C->ops->destroy    = MatDestroy_MatRedundant;
2396   }
2397   PetscFunctionReturn(0);
2398 }
2399 
2400 #undef __FUNCT__
2401 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2402 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2403 {
2404   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2405   PetscErrorCode ierr;
2406   PetscInt       i,*idxb = 0;
2407   PetscScalar    *va,*vb;
2408   Vec            vtmp;
2409 
2410   PetscFunctionBegin;
2411   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2412   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2413   if (idx) {
2414     for (i=0; i<A->rmap->n; i++) {
2415       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2416     }
2417   }
2418 
2419   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2420   if (idx) {
2421     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2422   }
2423   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2424   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2425 
2426   for (i=0; i<A->rmap->n; i++){
2427     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2428       va[i] = vb[i];
2429       if (idx) idx[i] = a->garray[idxb[i]];
2430     }
2431   }
2432 
2433   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2434   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2435   if (idxb) {
2436     ierr = PetscFree(idxb);CHKERRQ(ierr);
2437   }
2438   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2439   PetscFunctionReturn(0);
2440 }
2441 
2442 #undef __FUNCT__
2443 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2444 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2445 {
2446   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2447   PetscErrorCode ierr;
2448   PetscInt       i,*idxb = 0;
2449   PetscScalar    *va,*vb;
2450   Vec            vtmp;
2451 
2452   PetscFunctionBegin;
2453   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2454   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2455   if (idx) {
2456     for (i=0; i<A->cmap->n; i++) {
2457       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2458     }
2459   }
2460 
2461   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2462   if (idx) {
2463     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2464   }
2465   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2466   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2467 
2468   for (i=0; i<A->rmap->n; i++){
2469     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2470       va[i] = vb[i];
2471       if (idx) idx[i] = a->garray[idxb[i]];
2472     }
2473   }
2474 
2475   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2476   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2477   if (idxb) {
2478     ierr = PetscFree(idxb);CHKERRQ(ierr);
2479   }
2480   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2481   PetscFunctionReturn(0);
2482 }
2483 
2484 #undef __FUNCT__
2485 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2486 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2487 {
2488   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2489   PetscInt       n      = A->rmap->n;
2490   PetscInt       cstart = A->cmap->rstart;
2491   PetscInt      *cmap   = mat->garray;
2492   PetscInt      *diagIdx, *offdiagIdx;
2493   Vec            diagV, offdiagV;
2494   PetscScalar   *a, *diagA, *offdiagA;
2495   PetscInt       r;
2496   PetscErrorCode ierr;
2497 
2498   PetscFunctionBegin;
2499   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2500   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2501   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2502   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2503   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2504   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2505   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2506   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2507   for(r = 0; r < n; ++r) {
2508     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2509       a[r]   = diagA[r];
2510       idx[r] = cstart + diagIdx[r];
2511     } else {
2512       a[r]   = offdiagA[r];
2513       idx[r] = cmap[offdiagIdx[r]];
2514     }
2515   }
2516   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2517   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2518   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2519   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2520   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2521   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2522   PetscFunctionReturn(0);
2523 }
2524 
2525 #undef __FUNCT__
2526 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2527 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2528 {
2529   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2530   PetscInt       n      = A->rmap->n;
2531   PetscInt       cstart = A->cmap->rstart;
2532   PetscInt      *cmap   = mat->garray;
2533   PetscInt      *diagIdx, *offdiagIdx;
2534   Vec            diagV, offdiagV;
2535   PetscScalar   *a, *diagA, *offdiagA;
2536   PetscInt       r;
2537   PetscErrorCode ierr;
2538 
2539   PetscFunctionBegin;
2540   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2541   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2542   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2543   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2544   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2545   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2546   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2547   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2548   for(r = 0; r < n; ++r) {
2549     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2550       a[r]   = diagA[r];
2551       idx[r] = cstart + diagIdx[r];
2552     } else {
2553       a[r]   = offdiagA[r];
2554       idx[r] = cmap[offdiagIdx[r]];
2555     }
2556   }
2557   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2558   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2559   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2560   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2561   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2562   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2563   PetscFunctionReturn(0);
2564 }
2565 
2566 #undef __FUNCT__
2567 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2568 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2569 {
2570   PetscErrorCode ierr;
2571   Mat            *dummy;
2572 
2573   PetscFunctionBegin;
2574   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2575   *newmat = *dummy;
2576   ierr = PetscFree(dummy);CHKERRQ(ierr);
2577   PetscFunctionReturn(0);
2578 }
2579 
2580 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2581 /* -------------------------------------------------------------------*/
2582 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2583        MatGetRow_MPIAIJ,
2584        MatRestoreRow_MPIAIJ,
2585        MatMult_MPIAIJ,
2586 /* 4*/ MatMultAdd_MPIAIJ,
2587        MatMultTranspose_MPIAIJ,
2588        MatMultTransposeAdd_MPIAIJ,
2589 #ifdef PETSC_HAVE_PBGL
2590        MatSolve_MPIAIJ,
2591 #else
2592        0,
2593 #endif
2594        0,
2595        0,
2596 /*10*/ 0,
2597        0,
2598        0,
2599        MatSOR_MPIAIJ,
2600        MatTranspose_MPIAIJ,
2601 /*15*/ MatGetInfo_MPIAIJ,
2602        MatEqual_MPIAIJ,
2603        MatGetDiagonal_MPIAIJ,
2604        MatDiagonalScale_MPIAIJ,
2605        MatNorm_MPIAIJ,
2606 /*20*/ MatAssemblyBegin_MPIAIJ,
2607        MatAssemblyEnd_MPIAIJ,
2608        MatSetOption_MPIAIJ,
2609        MatZeroEntries_MPIAIJ,
2610 /*24*/ MatZeroRows_MPIAIJ,
2611        0,
2612 #ifdef PETSC_HAVE_PBGL
2613        0,
2614 #else
2615        0,
2616 #endif
2617        0,
2618        0,
2619 /*29*/ MatSetUpPreallocation_MPIAIJ,
2620 #ifdef PETSC_HAVE_PBGL
2621        0,
2622 #else
2623        0,
2624 #endif
2625        0,
2626        0,
2627        0,
2628 /*34*/ MatDuplicate_MPIAIJ,
2629        0,
2630        0,
2631        0,
2632        0,
2633 /*39*/ MatAXPY_MPIAIJ,
2634        MatGetSubMatrices_MPIAIJ,
2635        MatIncreaseOverlap_MPIAIJ,
2636        MatGetValues_MPIAIJ,
2637        MatCopy_MPIAIJ,
2638 /*44*/ MatGetRowMax_MPIAIJ,
2639        MatScale_MPIAIJ,
2640        0,
2641        0,
2642        0,
2643 /*49*/ MatSetBlockSize_MPIAIJ,
2644        0,
2645        0,
2646        0,
2647        0,
2648 /*54*/ MatFDColoringCreate_MPIAIJ,
2649        0,
2650        MatSetUnfactored_MPIAIJ,
2651        MatPermute_MPIAIJ,
2652        0,
2653 /*59*/ MatGetSubMatrix_MPIAIJ,
2654        MatDestroy_MPIAIJ,
2655        MatView_MPIAIJ,
2656        0,
2657        0,
2658 /*64*/ 0,
2659        0,
2660        0,
2661        0,
2662        0,
2663 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2664        MatGetRowMinAbs_MPIAIJ,
2665        0,
2666        MatSetColoring_MPIAIJ,
2667 #if defined(PETSC_HAVE_ADIC)
2668        MatSetValuesAdic_MPIAIJ,
2669 #else
2670        0,
2671 #endif
2672        MatSetValuesAdifor_MPIAIJ,
2673 /*75*/ MatFDColoringApply_AIJ,
2674        0,
2675        0,
2676        0,
2677        0,
2678 /*80*/ 0,
2679        0,
2680        0,
2681 /*83*/ MatLoad_MPIAIJ,
2682        0,
2683        0,
2684        0,
2685        0,
2686        0,
2687 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2688        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2689        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2690        MatPtAP_Basic,
2691        MatPtAPSymbolic_MPIAIJ,
2692 /*94*/ MatPtAPNumeric_MPIAIJ,
2693        0,
2694        0,
2695        0,
2696        0,
2697 /*99*/ 0,
2698        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2699        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2700        MatConjugate_MPIAIJ,
2701        0,
2702 /*104*/MatSetValuesRow_MPIAIJ,
2703        MatRealPart_MPIAIJ,
2704        MatImaginaryPart_MPIAIJ,
2705        0,
2706        0,
2707 /*109*/0,
2708        MatGetRedundantMatrix_MPIAIJ,
2709        MatGetRowMin_MPIAIJ,
2710        0,
2711        0,
2712 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2713        0,
2714        0,
2715        0,
2716        0,
2717        0
2718 };
2719 
2720 /* ----------------------------------------------------------------------------------------*/
2721 
2722 EXTERN_C_BEGIN
2723 #undef __FUNCT__
2724 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2725 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2726 {
2727   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2728   PetscErrorCode ierr;
2729 
2730   PetscFunctionBegin;
2731   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2732   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2733   PetscFunctionReturn(0);
2734 }
2735 EXTERN_C_END
2736 
2737 EXTERN_C_BEGIN
2738 #undef __FUNCT__
2739 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2740 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2741 {
2742   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2743   PetscErrorCode ierr;
2744 
2745   PetscFunctionBegin;
2746   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2747   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2748   PetscFunctionReturn(0);
2749 }
2750 EXTERN_C_END
2751 
2752 #include "petscpc.h"
2753 EXTERN_C_BEGIN
2754 #undef __FUNCT__
2755 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2756 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2757 {
2758   Mat_MPIAIJ     *b;
2759   PetscErrorCode ierr;
2760   PetscInt       i;
2761 
2762   PetscFunctionBegin;
2763   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2764   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2765   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2766   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2767 
2768   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2769   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2770   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2771   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2772   if (d_nnz) {
2773     for (i=0; i<B->rmap->n; i++) {
2774       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2775     }
2776   }
2777   if (o_nnz) {
2778     for (i=0; i<B->rmap->n; i++) {
2779       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2780     }
2781   }
2782   b = (Mat_MPIAIJ*)B->data;
2783 
2784   if (!B->preallocated) {
2785     /* Explicitly create 2 MATSEQAIJ matrices. */
2786     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2787     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2788     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2789     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2790     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2791     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2792     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2793     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2794   }
2795 
2796   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2797   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2798   B->preallocated = PETSC_TRUE;
2799   PetscFunctionReturn(0);
2800 }
2801 EXTERN_C_END
2802 
2803 #undef __FUNCT__
2804 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2805 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2806 {
2807   Mat            mat;
2808   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2809   PetscErrorCode ierr;
2810 
2811   PetscFunctionBegin;
2812   *newmat       = 0;
2813   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2814   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2815   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2816   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2817   a    = (Mat_MPIAIJ*)mat->data;
2818 
2819   mat->factor       = matin->factor;
2820   mat->rmap->bs      = matin->rmap->bs;
2821   mat->assembled    = PETSC_TRUE;
2822   mat->insertmode   = NOT_SET_VALUES;
2823   mat->preallocated = PETSC_TRUE;
2824 
2825   a->size           = oldmat->size;
2826   a->rank           = oldmat->rank;
2827   a->donotstash     = oldmat->donotstash;
2828   a->roworiented    = oldmat->roworiented;
2829   a->rowindices     = 0;
2830   a->rowvalues      = 0;
2831   a->getrowactive   = PETSC_FALSE;
2832 
2833   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2834   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2835 
2836   if (oldmat->colmap) {
2837 #if defined (PETSC_USE_CTABLE)
2838     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2839 #else
2840     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2841     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2842     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2843 #endif
2844   } else a->colmap = 0;
2845   if (oldmat->garray) {
2846     PetscInt len;
2847     len  = oldmat->B->cmap->n;
2848     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2849     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2850     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2851   } else a->garray = 0;
2852 
2853   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2854   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2855   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2856   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2857   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2858   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2859   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2860   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2861   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2862   *newmat = mat;
2863   PetscFunctionReturn(0);
2864 }
2865 
2866 #undef __FUNCT__
2867 #define __FUNCT__ "MatLoad_MPIAIJ"
2868 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2869 {
2870   Mat            A;
2871   PetscScalar    *vals,*svals;
2872   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2873   MPI_Status     status;
2874   PetscErrorCode ierr;
2875   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz;
2876   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2877   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2878   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2879   PetscInt       cend,cstart,n,*rowners;
2880   int            fd;
2881 
2882   PetscFunctionBegin;
2883   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2884   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2885   if (!rank) {
2886     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2887     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2888     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2889   }
2890 
2891   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2892   M = header[1]; N = header[2];
2893   /* determine ownership of all rows */
2894   m    = M/size + ((M % size) > rank);
2895   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2896   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2897 
2898   /* First process needs enough room for process with most rows */
2899   if (!rank) {
2900     mmax       = rowners[1];
2901     for (i=2; i<size; i++) {
2902       mmax = PetscMax(mmax,rowners[i]);
2903     }
2904   } else mmax = m;
2905 
2906   rowners[0] = 0;
2907   for (i=2; i<=size; i++) {
2908     rowners[i] += rowners[i-1];
2909   }
2910   rstart = rowners[rank];
2911   rend   = rowners[rank+1];
2912 
2913   /* distribute row lengths to all processors */
2914   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2915   if (!rank) {
2916     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2917     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2918     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2919     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2920     for (j=0; j<m; j++) {
2921       procsnz[0] += ourlens[j];
2922     }
2923     for (i=1; i<size; i++) {
2924       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2925       /* calculate the number of nonzeros on each processor */
2926       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2927         procsnz[i] += rowlengths[j];
2928       }
2929       mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]);
2930       ierr   = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2931     }
2932     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2933   } else {
2934     mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr);
2935     ierr   = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2936   }
2937 
2938   if (!rank) {
2939     /* determine max buffer needed and allocate it */
2940     maxnz = 0;
2941     for (i=0; i<size; i++) {
2942       maxnz = PetscMax(maxnz,procsnz[i]);
2943     }
2944     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2945 
2946     /* read in my part of the matrix column indices  */
2947     nz   = procsnz[0];
2948     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2949     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2950 
2951     /* read in every one elses and ship off */
2952     for (i=1; i<size; i++) {
2953       nz     = procsnz[i];
2954       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2955       mpicnt = PetscMPIIntCast(nz);
2956       ierr   = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2957     }
2958     ierr = PetscFree(cols);CHKERRQ(ierr);
2959   } else {
2960     /* determine buffer space needed for message */
2961     nz = 0;
2962     for (i=0; i<m; i++) {
2963       nz += ourlens[i];
2964     }
2965     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2966 
2967     /* receive message of column indices*/
2968     mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr);
2969     ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2970     ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr);
2971     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
2972     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
2973     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
2974   }
2975 
2976   /* determine column ownership if matrix is not square */
2977   if (N != M) {
2978     n      = N/size + ((N % size) > rank);
2979     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2980     cstart = cend - n;
2981   } else {
2982     cstart = rstart;
2983     cend   = rend;
2984     n      = cend - cstart;
2985   }
2986 
2987   /* loop over local rows, determining number of off diagonal entries */
2988   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2989   jj = 0;
2990   for (i=0; i<m; i++) {
2991     for (j=0; j<ourlens[i]; j++) {
2992       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2993       jj++;
2994     }
2995   }
2996 
2997   /* create our matrix */
2998   for (i=0; i<m; i++) {
2999     ourlens[i] -= offlens[i];
3000   }
3001   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
3002   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
3003   ierr = MatSetType(A,type);CHKERRQ(ierr);
3004   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
3005 
3006   for (i=0; i<m; i++) {
3007     ourlens[i] += offlens[i];
3008   }
3009 
3010   if (!rank) {
3011     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3012 
3013     /* read in my part of the matrix numerical values  */
3014     nz   = procsnz[0];
3015     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3016 
3017     /* insert into matrix */
3018     jj      = rstart;
3019     smycols = mycols;
3020     svals   = vals;
3021     for (i=0; i<m; i++) {
3022       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3023       smycols += ourlens[i];
3024       svals   += ourlens[i];
3025       jj++;
3026     }
3027 
3028     /* read in other processors and ship out */
3029     for (i=1; i<size; i++) {
3030       nz     = procsnz[i];
3031       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3032       mpicnt = PetscMPIIntCast(nz);
3033       ierr   = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3034     }
3035     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3036   } else {
3037     /* receive numeric values */
3038     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3039 
3040     /* receive message of values*/
3041     mpicnt = PetscMPIIntCast(nz);
3042     ierr   = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
3043     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr);
3044     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
3045     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
3046     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
3047 
3048     /* insert into matrix */
3049     jj      = rstart;
3050     smycols = mycols;
3051     svals   = vals;
3052     for (i=0; i<m; i++) {
3053       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3054       smycols += ourlens[i];
3055       svals   += ourlens[i];
3056       jj++;
3057     }
3058   }
3059   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3060   ierr = PetscFree(vals);CHKERRQ(ierr);
3061   ierr = PetscFree(mycols);CHKERRQ(ierr);
3062   ierr = PetscFree(rowners);CHKERRQ(ierr);
3063 
3064   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3065   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3066   *newmat = A;
3067   PetscFunctionReturn(0);
3068 }
3069 
3070 #undef __FUNCT__
3071 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3072 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3073 {
3074   PetscErrorCode ierr;
3075   IS             iscol_local;
3076   PetscInt       csize;
3077 
3078   PetscFunctionBegin;
3079   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3080   if (call == MAT_REUSE_MATRIX) {
3081     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3082     if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3083   } else {
3084     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3085   }
3086   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3087   if (call == MAT_INITIAL_MATRIX) {
3088     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3089     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3090   }
3091   PetscFunctionReturn(0);
3092 }
3093 
3094 #undef __FUNCT__
3095 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3096 /*
3097     Not great since it makes two copies of the submatrix, first an SeqAIJ
3098   in local and then by concatenating the local matrices the end result.
3099   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3100 
3101   Note: This requires a sequential iscol with all indices.
3102 */
3103 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3104 {
3105   PetscErrorCode ierr;
3106   PetscMPIInt    rank,size;
3107   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3108   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3109   Mat            *local,M,Mreuse;
3110   MatScalar      *vwork,*aa;
3111   MPI_Comm       comm = ((PetscObject)mat)->comm;
3112   Mat_SeqAIJ     *aij;
3113 
3114 
3115   PetscFunctionBegin;
3116   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3117   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3118 
3119   if (call ==  MAT_REUSE_MATRIX) {
3120     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3121     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3122     local = &Mreuse;
3123     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3124   } else {
3125     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3126     Mreuse = *local;
3127     ierr   = PetscFree(local);CHKERRQ(ierr);
3128   }
3129 
3130   /*
3131       m - number of local rows
3132       n - number of columns (same on all processors)
3133       rstart - first row in new global matrix generated
3134   */
3135   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3136   if (call == MAT_INITIAL_MATRIX) {
3137     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3138     ii  = aij->i;
3139     jj  = aij->j;
3140 
3141     /*
3142         Determine the number of non-zeros in the diagonal and off-diagonal
3143         portions of the matrix in order to do correct preallocation
3144     */
3145 
3146     /* first get start and end of "diagonal" columns */
3147     if (csize == PETSC_DECIDE) {
3148       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3149       if (mglobal == n) { /* square matrix */
3150 	nlocal = m;
3151       } else {
3152         nlocal = n/size + ((n % size) > rank);
3153       }
3154     } else {
3155       nlocal = csize;
3156     }
3157     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3158     rstart = rend - nlocal;
3159     if (rank == size - 1 && rend != n) {
3160       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3161     }
3162 
3163     /* next, compute all the lengths */
3164     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3165     olens = dlens + m;
3166     for (i=0; i<m; i++) {
3167       jend = ii[i+1] - ii[i];
3168       olen = 0;
3169       dlen = 0;
3170       for (j=0; j<jend; j++) {
3171         if (*jj < rstart || *jj >= rend) olen++;
3172         else dlen++;
3173         jj++;
3174       }
3175       olens[i] = olen;
3176       dlens[i] = dlen;
3177     }
3178     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3179     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3180     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3181     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3182     ierr = PetscFree(dlens);CHKERRQ(ierr);
3183   } else {
3184     PetscInt ml,nl;
3185 
3186     M = *newmat;
3187     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3188     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3189     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3190     /*
3191          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3192        rather than the slower MatSetValues().
3193     */
3194     M->was_assembled = PETSC_TRUE;
3195     M->assembled     = PETSC_FALSE;
3196   }
3197   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3198   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3199   ii  = aij->i;
3200   jj  = aij->j;
3201   aa  = aij->a;
3202   for (i=0; i<m; i++) {
3203     row   = rstart + i;
3204     nz    = ii[i+1] - ii[i];
3205     cwork = jj;     jj += nz;
3206     vwork = aa;     aa += nz;
3207     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3208   }
3209 
3210   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3211   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3212   *newmat = M;
3213 
3214   /* save submatrix used in processor for next request */
3215   if (call ==  MAT_INITIAL_MATRIX) {
3216     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3217     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3218   }
3219 
3220   PetscFunctionReturn(0);
3221 }
3222 
3223 EXTERN_C_BEGIN
3224 #undef __FUNCT__
3225 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3226 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3227 {
3228   PetscInt       m,cstart, cend,j,nnz,i,d;
3229   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3230   const PetscInt *JJ;
3231   PetscScalar    *values;
3232   PetscErrorCode ierr;
3233 
3234   PetscFunctionBegin;
3235   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3236 
3237   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3238   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3239   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3240   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3241   m      = B->rmap->n;
3242   cstart = B->cmap->rstart;
3243   cend   = B->cmap->rend;
3244   rstart = B->rmap->rstart;
3245 
3246   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3247 
3248 #if defined(PETSC_USE_DEBUGGING)
3249   for (i=0; i<m; i++) {
3250     nnz     = Ii[i+1]- Ii[i];
3251     JJ      = J + Ii[i];
3252     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3253     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3254     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3255   }
3256 #endif
3257 
3258   for (i=0; i<m; i++) {
3259     nnz     = Ii[i+1]- Ii[i];
3260     JJ      = J + Ii[i];
3261     nnz_max = PetscMax(nnz_max,nnz);
3262     d       = 0;
3263     for (j=0; j<nnz; j++) {
3264       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3265     }
3266     d_nnz[i] = d;
3267     o_nnz[i] = nnz - d;
3268   }
3269   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3270   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3271 
3272   if (v) values = (PetscScalar*)v;
3273   else {
3274     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3275     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3276   }
3277 
3278   for (i=0; i<m; i++) {
3279     ii   = i + rstart;
3280     nnz  = Ii[i+1]- Ii[i];
3281     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3282   }
3283   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3284   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3285 
3286   if (!v) {
3287     ierr = PetscFree(values);CHKERRQ(ierr);
3288   }
3289   PetscFunctionReturn(0);
3290 }
3291 EXTERN_C_END
3292 
3293 #undef __FUNCT__
3294 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3295 /*@
3296    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3297    (the default parallel PETSc format).
3298 
3299    Collective on MPI_Comm
3300 
3301    Input Parameters:
3302 +  B - the matrix
3303 .  i - the indices into j for the start of each local row (starts with zero)
3304 .  j - the column indices for each local row (starts with zero)
3305 -  v - optional values in the matrix
3306 
3307    Level: developer
3308 
3309    Notes:
3310        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3311      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3312      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3313 
3314        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3315 
3316        The format which is used for the sparse matrix input, is equivalent to a
3317     row-major ordering.. i.e for the following matrix, the input data expected is
3318     as shown:
3319 
3320         1 0 0
3321         2 0 3     P0
3322        -------
3323         4 5 6     P1
3324 
3325      Process0 [P0]: rows_owned=[0,1]
3326         i =  {0,1,3}  [size = nrow+1  = 2+1]
3327         j =  {0,0,2}  [size = nz = 6]
3328         v =  {1,2,3}  [size = nz = 6]
3329 
3330      Process1 [P1]: rows_owned=[2]
3331         i =  {0,3}    [size = nrow+1  = 1+1]
3332         j =  {0,1,2}  [size = nz = 6]
3333         v =  {4,5,6}  [size = nz = 6]
3334 
3335 .keywords: matrix, aij, compressed row, sparse, parallel
3336 
3337 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3338           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3339 @*/
3340 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3341 {
3342   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3343 
3344   PetscFunctionBegin;
3345   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3346   if (f) {
3347     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3348   }
3349   PetscFunctionReturn(0);
3350 }
3351 
3352 #undef __FUNCT__
3353 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3354 /*@C
3355    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3356    (the default parallel PETSc format).  For good matrix assembly performance
3357    the user should preallocate the matrix storage by setting the parameters
3358    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3359    performance can be increased by more than a factor of 50.
3360 
3361    Collective on MPI_Comm
3362 
3363    Input Parameters:
3364 +  A - the matrix
3365 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3366            (same value is used for all local rows)
3367 .  d_nnz - array containing the number of nonzeros in the various rows of the
3368            DIAGONAL portion of the local submatrix (possibly different for each row)
3369            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3370            The size of this array is equal to the number of local rows, i.e 'm'.
3371            You must leave room for the diagonal entry even if it is zero.
3372 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3373            submatrix (same value is used for all local rows).
3374 -  o_nnz - array containing the number of nonzeros in the various rows of the
3375            OFF-DIAGONAL portion of the local submatrix (possibly different for
3376            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3377            structure. The size of this array is equal to the number
3378            of local rows, i.e 'm'.
3379 
3380    If the *_nnz parameter is given then the *_nz parameter is ignored
3381 
3382    The AIJ format (also called the Yale sparse matrix format or
3383    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3384    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3385 
3386    The parallel matrix is partitioned such that the first m0 rows belong to
3387    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3388    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3389 
3390    The DIAGONAL portion of the local submatrix of a processor can be defined
3391    as the submatrix which is obtained by extraction the part corresponding
3392    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3393    first row that belongs to the processor, and r2 is the last row belonging
3394    to the this processor. This is a square mxm matrix. The remaining portion
3395    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3396 
3397    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3398 
3399    You can call MatGetInfo() to get information on how effective the preallocation was;
3400    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3401    You can also run with the option -info and look for messages with the string
3402    malloc in them to see if additional memory allocation was needed.
3403 
3404    Example usage:
3405 
3406    Consider the following 8x8 matrix with 34 non-zero values, that is
3407    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3408    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3409    as follows:
3410 
3411 .vb
3412             1  2  0  |  0  3  0  |  0  4
3413     Proc0   0  5  6  |  7  0  0  |  8  0
3414             9  0 10  | 11  0  0  | 12  0
3415     -------------------------------------
3416            13  0 14  | 15 16 17  |  0  0
3417     Proc1   0 18  0  | 19 20 21  |  0  0
3418             0  0  0  | 22 23  0  | 24  0
3419     -------------------------------------
3420     Proc2  25 26 27  |  0  0 28  | 29  0
3421            30  0  0  | 31 32 33  |  0 34
3422 .ve
3423 
3424    This can be represented as a collection of submatrices as:
3425 
3426 .vb
3427       A B C
3428       D E F
3429       G H I
3430 .ve
3431 
3432    Where the submatrices A,B,C are owned by proc0, D,E,F are
3433    owned by proc1, G,H,I are owned by proc2.
3434 
3435    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3436    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3437    The 'M','N' parameters are 8,8, and have the same values on all procs.
3438 
3439    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3440    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3441    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3442    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3443    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3444    matrix, ans [DF] as another SeqAIJ matrix.
3445 
3446    When d_nz, o_nz parameters are specified, d_nz storage elements are
3447    allocated for every row of the local diagonal submatrix, and o_nz
3448    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3449    One way to choose d_nz and o_nz is to use the max nonzerors per local
3450    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3451    In this case, the values of d_nz,o_nz are:
3452 .vb
3453      proc0 : dnz = 2, o_nz = 2
3454      proc1 : dnz = 3, o_nz = 2
3455      proc2 : dnz = 1, o_nz = 4
3456 .ve
3457    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3458    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3459    for proc3. i.e we are using 12+15+10=37 storage locations to store
3460    34 values.
3461 
3462    When d_nnz, o_nnz parameters are specified, the storage is specified
3463    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3464    In the above case the values for d_nnz,o_nnz are:
3465 .vb
3466      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3467      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3468      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3469 .ve
3470    Here the space allocated is sum of all the above values i.e 34, and
3471    hence pre-allocation is perfect.
3472 
3473    Level: intermediate
3474 
3475 .keywords: matrix, aij, compressed row, sparse, parallel
3476 
3477 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3478           MPIAIJ, MatGetInfo()
3479 @*/
3480 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3481 {
3482   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3483 
3484   PetscFunctionBegin;
3485   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3486   if (f) {
3487     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3488   }
3489   PetscFunctionReturn(0);
3490 }
3491 
3492 #undef __FUNCT__
3493 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3494 /*@
3495      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3496          CSR format the local rows.
3497 
3498    Collective on MPI_Comm
3499 
3500    Input Parameters:
3501 +  comm - MPI communicator
3502 .  m - number of local rows (Cannot be PETSC_DECIDE)
3503 .  n - This value should be the same as the local size used in creating the
3504        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3505        calculated if N is given) For square matrices n is almost always m.
3506 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3507 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3508 .   i - row indices
3509 .   j - column indices
3510 -   a - matrix values
3511 
3512    Output Parameter:
3513 .   mat - the matrix
3514 
3515    Level: intermediate
3516 
3517    Notes:
3518        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3519      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3520      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3521 
3522        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3523 
3524        The format which is used for the sparse matrix input, is equivalent to a
3525     row-major ordering.. i.e for the following matrix, the input data expected is
3526     as shown:
3527 
3528         1 0 0
3529         2 0 3     P0
3530        -------
3531         4 5 6     P1
3532 
3533      Process0 [P0]: rows_owned=[0,1]
3534         i =  {0,1,3}  [size = nrow+1  = 2+1]
3535         j =  {0,0,2}  [size = nz = 6]
3536         v =  {1,2,3}  [size = nz = 6]
3537 
3538      Process1 [P1]: rows_owned=[2]
3539         i =  {0,3}    [size = nrow+1  = 1+1]
3540         j =  {0,1,2}  [size = nz = 6]
3541         v =  {4,5,6}  [size = nz = 6]
3542 
3543 .keywords: matrix, aij, compressed row, sparse, parallel
3544 
3545 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3546           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3547 @*/
3548 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3549 {
3550   PetscErrorCode ierr;
3551 
3552  PetscFunctionBegin;
3553   if (i[0]) {
3554     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3555   }
3556   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3557   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3558   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3559   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3560   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3561   PetscFunctionReturn(0);
3562 }
3563 
3564 #undef __FUNCT__
3565 #define __FUNCT__ "MatCreateMPIAIJ"
3566 /*@C
3567    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3568    (the default parallel PETSc format).  For good matrix assembly performance
3569    the user should preallocate the matrix storage by setting the parameters
3570    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3571    performance can be increased by more than a factor of 50.
3572 
3573    Collective on MPI_Comm
3574 
3575    Input Parameters:
3576 +  comm - MPI communicator
3577 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3578            This value should be the same as the local size used in creating the
3579            y vector for the matrix-vector product y = Ax.
3580 .  n - This value should be the same as the local size used in creating the
3581        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3582        calculated if N is given) For square matrices n is almost always m.
3583 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3584 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3585 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3586            (same value is used for all local rows)
3587 .  d_nnz - array containing the number of nonzeros in the various rows of the
3588            DIAGONAL portion of the local submatrix (possibly different for each row)
3589            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3590            The size of this array is equal to the number of local rows, i.e 'm'.
3591            You must leave room for the diagonal entry even if it is zero.
3592 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3593            submatrix (same value is used for all local rows).
3594 -  o_nnz - array containing the number of nonzeros in the various rows of the
3595            OFF-DIAGONAL portion of the local submatrix (possibly different for
3596            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3597            structure. The size of this array is equal to the number
3598            of local rows, i.e 'm'.
3599 
3600    Output Parameter:
3601 .  A - the matrix
3602 
3603    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3604    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3605    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3606 
3607    Notes:
3608    If the *_nnz parameter is given then the *_nz parameter is ignored
3609 
3610    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3611    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3612    storage requirements for this matrix.
3613 
3614    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3615    processor than it must be used on all processors that share the object for
3616    that argument.
3617 
3618    The user MUST specify either the local or global matrix dimensions
3619    (possibly both).
3620 
3621    The parallel matrix is partitioned across processors such that the
3622    first m0 rows belong to process 0, the next m1 rows belong to
3623    process 1, the next m2 rows belong to process 2 etc.. where
3624    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3625    values corresponding to [m x N] submatrix.
3626 
3627    The columns are logically partitioned with the n0 columns belonging
3628    to 0th partition, the next n1 columns belonging to the next
3629    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3630 
3631    The DIAGONAL portion of the local submatrix on any given processor
3632    is the submatrix corresponding to the rows and columns m,n
3633    corresponding to the given processor. i.e diagonal matrix on
3634    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3635    etc. The remaining portion of the local submatrix [m x (N-n)]
3636    constitute the OFF-DIAGONAL portion. The example below better
3637    illustrates this concept.
3638 
3639    For a square global matrix we define each processor's diagonal portion
3640    to be its local rows and the corresponding columns (a square submatrix);
3641    each processor's off-diagonal portion encompasses the remainder of the
3642    local matrix (a rectangular submatrix).
3643 
3644    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3645 
3646    When calling this routine with a single process communicator, a matrix of
3647    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3648    type of communicator, use the construction mechanism:
3649      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3650 
3651    By default, this format uses inodes (identical nodes) when possible.
3652    We search for consecutive rows with the same nonzero structure, thereby
3653    reusing matrix information to achieve increased efficiency.
3654 
3655    Options Database Keys:
3656 +  -mat_no_inode  - Do not use inodes
3657 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3658 -  -mat_aij_oneindex - Internally use indexing starting at 1
3659         rather than 0.  Note that when calling MatSetValues(),
3660         the user still MUST index entries starting at 0!
3661 
3662 
3663    Example usage:
3664 
3665    Consider the following 8x8 matrix with 34 non-zero values, that is
3666    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3667    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3668    as follows:
3669 
3670 .vb
3671             1  2  0  |  0  3  0  |  0  4
3672     Proc0   0  5  6  |  7  0  0  |  8  0
3673             9  0 10  | 11  0  0  | 12  0
3674     -------------------------------------
3675            13  0 14  | 15 16 17  |  0  0
3676     Proc1   0 18  0  | 19 20 21  |  0  0
3677             0  0  0  | 22 23  0  | 24  0
3678     -------------------------------------
3679     Proc2  25 26 27  |  0  0 28  | 29  0
3680            30  0  0  | 31 32 33  |  0 34
3681 .ve
3682 
3683    This can be represented as a collection of submatrices as:
3684 
3685 .vb
3686       A B C
3687       D E F
3688       G H I
3689 .ve
3690 
3691    Where the submatrices A,B,C are owned by proc0, D,E,F are
3692    owned by proc1, G,H,I are owned by proc2.
3693 
3694    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3695    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3696    The 'M','N' parameters are 8,8, and have the same values on all procs.
3697 
3698    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3699    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3700    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3701    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3702    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3703    matrix, ans [DF] as another SeqAIJ matrix.
3704 
3705    When d_nz, o_nz parameters are specified, d_nz storage elements are
3706    allocated for every row of the local diagonal submatrix, and o_nz
3707    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3708    One way to choose d_nz and o_nz is to use the max nonzerors per local
3709    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3710    In this case, the values of d_nz,o_nz are:
3711 .vb
3712      proc0 : dnz = 2, o_nz = 2
3713      proc1 : dnz = 3, o_nz = 2
3714      proc2 : dnz = 1, o_nz = 4
3715 .ve
3716    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3717    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3718    for proc3. i.e we are using 12+15+10=37 storage locations to store
3719    34 values.
3720 
3721    When d_nnz, o_nnz parameters are specified, the storage is specified
3722    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3723    In the above case the values for d_nnz,o_nnz are:
3724 .vb
3725      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3726      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3727      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3728 .ve
3729    Here the space allocated is sum of all the above values i.e 34, and
3730    hence pre-allocation is perfect.
3731 
3732    Level: intermediate
3733 
3734 .keywords: matrix, aij, compressed row, sparse, parallel
3735 
3736 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3737           MPIAIJ, MatCreateMPIAIJWithArrays()
3738 @*/
3739 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3740 {
3741   PetscErrorCode ierr;
3742   PetscMPIInt    size;
3743 
3744   PetscFunctionBegin;
3745   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3746   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3747   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3748   if (size > 1) {
3749     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3750     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3751   } else {
3752     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3753     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3754   }
3755   PetscFunctionReturn(0);
3756 }
3757 
3758 #undef __FUNCT__
3759 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3760 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3761 {
3762   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3763 
3764   PetscFunctionBegin;
3765   *Ad     = a->A;
3766   *Ao     = a->B;
3767   *colmap = a->garray;
3768   PetscFunctionReturn(0);
3769 }
3770 
3771 #undef __FUNCT__
3772 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3773 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3774 {
3775   PetscErrorCode ierr;
3776   PetscInt       i;
3777   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3778 
3779   PetscFunctionBegin;
3780   if (coloring->ctype == IS_COLORING_GLOBAL) {
3781     ISColoringValue *allcolors,*colors;
3782     ISColoring      ocoloring;
3783 
3784     /* set coloring for diagonal portion */
3785     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3786 
3787     /* set coloring for off-diagonal portion */
3788     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3789     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3790     for (i=0; i<a->B->cmap->n; i++) {
3791       colors[i] = allcolors[a->garray[i]];
3792     }
3793     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3794     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3795     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3796     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3797   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3798     ISColoringValue *colors;
3799     PetscInt        *larray;
3800     ISColoring      ocoloring;
3801 
3802     /* set coloring for diagonal portion */
3803     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3804     for (i=0; i<a->A->cmap->n; i++) {
3805       larray[i] = i + A->cmap->rstart;
3806     }
3807     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3808     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3809     for (i=0; i<a->A->cmap->n; i++) {
3810       colors[i] = coloring->colors[larray[i]];
3811     }
3812     ierr = PetscFree(larray);CHKERRQ(ierr);
3813     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3814     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3815     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3816 
3817     /* set coloring for off-diagonal portion */
3818     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3819     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3820     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3821     for (i=0; i<a->B->cmap->n; i++) {
3822       colors[i] = coloring->colors[larray[i]];
3823     }
3824     ierr = PetscFree(larray);CHKERRQ(ierr);
3825     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3826     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3827     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3828   } else {
3829     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3830   }
3831 
3832   PetscFunctionReturn(0);
3833 }
3834 
3835 #if defined(PETSC_HAVE_ADIC)
3836 #undef __FUNCT__
3837 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3838 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3839 {
3840   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3841   PetscErrorCode ierr;
3842 
3843   PetscFunctionBegin;
3844   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3845   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3846   PetscFunctionReturn(0);
3847 }
3848 #endif
3849 
3850 #undef __FUNCT__
3851 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3852 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3853 {
3854   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3855   PetscErrorCode ierr;
3856 
3857   PetscFunctionBegin;
3858   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3859   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3860   PetscFunctionReturn(0);
3861 }
3862 
3863 #undef __FUNCT__
3864 #define __FUNCT__ "MatMerge"
3865 /*@
3866       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3867                  matrices from each processor
3868 
3869     Collective on MPI_Comm
3870 
3871    Input Parameters:
3872 +    comm - the communicators the parallel matrix will live on
3873 .    inmat - the input sequential matrices
3874 .    n - number of local columns (or PETSC_DECIDE)
3875 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3876 
3877    Output Parameter:
3878 .    outmat - the parallel matrix generated
3879 
3880     Level: advanced
3881 
3882    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3883 
3884 @*/
3885 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3886 {
3887   PetscErrorCode ierr;
3888   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3889   PetscInt       *indx;
3890   PetscScalar    *values;
3891 
3892   PetscFunctionBegin;
3893   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3894   if (scall == MAT_INITIAL_MATRIX){
3895     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3896     if (n == PETSC_DECIDE){
3897       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3898     }
3899     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3900     rstart -= m;
3901 
3902     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3903     for (i=0;i<m;i++) {
3904       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3905       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3906       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3907     }
3908     /* This routine will ONLY return MPIAIJ type matrix */
3909     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3910     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3911     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3912     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3913     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3914 
3915   } else if (scall == MAT_REUSE_MATRIX){
3916     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3917   } else {
3918     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3919   }
3920 
3921   for (i=0;i<m;i++) {
3922     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3923     Ii    = i + rstart;
3924     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3925     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3926   }
3927   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3928   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3929   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3930 
3931   PetscFunctionReturn(0);
3932 }
3933 
3934 #undef __FUNCT__
3935 #define __FUNCT__ "MatFileSplit"
3936 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3937 {
3938   PetscErrorCode    ierr;
3939   PetscMPIInt       rank;
3940   PetscInt          m,N,i,rstart,nnz;
3941   size_t            len;
3942   const PetscInt    *indx;
3943   PetscViewer       out;
3944   char              *name;
3945   Mat               B;
3946   const PetscScalar *values;
3947 
3948   PetscFunctionBegin;
3949   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3950   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3951   /* Should this be the type of the diagonal block of A? */
3952   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3953   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3954   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3955   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3956   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3957   for (i=0;i<m;i++) {
3958     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3959     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3960     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3961   }
3962   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3963   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3964 
3965   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3966   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3967   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3968   sprintf(name,"%s.%d",outfile,rank);
3969   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3970   ierr = PetscFree(name);
3971   ierr = MatView(B,out);CHKERRQ(ierr);
3972   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3973   ierr = MatDestroy(B);CHKERRQ(ierr);
3974   PetscFunctionReturn(0);
3975 }
3976 
3977 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3978 #undef __FUNCT__
3979 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3980 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3981 {
3982   PetscErrorCode       ierr;
3983   Mat_Merge_SeqsToMPI  *merge;
3984   PetscContainer       container;
3985 
3986   PetscFunctionBegin;
3987   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3988   if (container) {
3989     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3990     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3991     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3992     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3993     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3994     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3995     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3996     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3997     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3998     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3999     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4000     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4001     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4002     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
4003 
4004     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4005     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4006   }
4007   ierr = PetscFree(merge);CHKERRQ(ierr);
4008 
4009   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4010   PetscFunctionReturn(0);
4011 }
4012 
4013 #include "../src/mat/utils/freespace.h"
4014 #include "petscbt.h"
4015 
4016 #undef __FUNCT__
4017 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4018 /*@C
4019       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4020                  matrices from each processor
4021 
4022     Collective on MPI_Comm
4023 
4024    Input Parameters:
4025 +    comm - the communicators the parallel matrix will live on
4026 .    seqmat - the input sequential matrices
4027 .    m - number of local rows (or PETSC_DECIDE)
4028 .    n - number of local columns (or PETSC_DECIDE)
4029 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4030 
4031    Output Parameter:
4032 .    mpimat - the parallel matrix generated
4033 
4034     Level: advanced
4035 
4036    Notes:
4037      The dimensions of the sequential matrix in each processor MUST be the same.
4038      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4039      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4040 @*/
4041 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4042 {
4043   PetscErrorCode       ierr;
4044   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4045   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4046   PetscMPIInt          size,rank,taga,*len_s;
4047   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4048   PetscInt             proc,m;
4049   PetscInt             **buf_ri,**buf_rj;
4050   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4051   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4052   MPI_Request          *s_waits,*r_waits;
4053   MPI_Status           *status;
4054   MatScalar            *aa=a->a;
4055   MatScalar            **abuf_r,*ba_i;
4056   Mat_Merge_SeqsToMPI  *merge;
4057   PetscContainer       container;
4058 
4059   PetscFunctionBegin;
4060   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4061 
4062   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4063   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4064 
4065   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4066   if (container) {
4067     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4068   }
4069   bi     = merge->bi;
4070   bj     = merge->bj;
4071   buf_ri = merge->buf_ri;
4072   buf_rj = merge->buf_rj;
4073 
4074   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4075   owners = merge->rowmap->range;
4076   len_s  = merge->len_s;
4077 
4078   /* send and recv matrix values */
4079   /*-----------------------------*/
4080   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4081   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4082 
4083   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4084   for (proc=0,k=0; proc<size; proc++){
4085     if (!len_s[proc]) continue;
4086     i = owners[proc];
4087     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4088     k++;
4089   }
4090 
4091   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4092   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4093   ierr = PetscFree(status);CHKERRQ(ierr);
4094 
4095   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4096   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4097 
4098   /* insert mat values of mpimat */
4099   /*----------------------------*/
4100   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4101   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4102 
4103   for (k=0; k<merge->nrecv; k++){
4104     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4105     nrows = *(buf_ri_k[k]);
4106     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4107     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4108   }
4109 
4110   /* set values of ba */
4111   m = merge->rowmap->n;
4112   for (i=0; i<m; i++) {
4113     arow = owners[rank] + i;
4114     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4115     bnzi = bi[i+1] - bi[i];
4116     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4117 
4118     /* add local non-zero vals of this proc's seqmat into ba */
4119     anzi = ai[arow+1] - ai[arow];
4120     aj   = a->j + ai[arow];
4121     aa   = a->a + ai[arow];
4122     nextaj = 0;
4123     for (j=0; nextaj<anzi; j++){
4124       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4125         ba_i[j] += aa[nextaj++];
4126       }
4127     }
4128 
4129     /* add received vals into ba */
4130     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4131       /* i-th row */
4132       if (i == *nextrow[k]) {
4133         anzi = *(nextai[k]+1) - *nextai[k];
4134         aj   = buf_rj[k] + *(nextai[k]);
4135         aa   = abuf_r[k] + *(nextai[k]);
4136         nextaj = 0;
4137         for (j=0; nextaj<anzi; j++){
4138           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4139             ba_i[j] += aa[nextaj++];
4140           }
4141         }
4142         nextrow[k]++; nextai[k]++;
4143       }
4144     }
4145     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4146   }
4147   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4148   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4149 
4150   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4151   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4152   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4153   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4154   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4155   PetscFunctionReturn(0);
4156 }
4157 
4158 #undef __FUNCT__
4159 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4160 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4161 {
4162   PetscErrorCode       ierr;
4163   Mat                  B_mpi;
4164   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4165   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4166   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4167   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4168   PetscInt             len,proc,*dnz,*onz;
4169   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4170   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4171   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4172   MPI_Status           *status;
4173   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4174   PetscBT              lnkbt;
4175   Mat_Merge_SeqsToMPI  *merge;
4176   PetscContainer       container;
4177 
4178   PetscFunctionBegin;
4179   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4180 
4181   /* make sure it is a PETSc comm */
4182   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4183   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4184   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4185 
4186   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4187   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4188 
4189   /* determine row ownership */
4190   /*---------------------------------------------------------*/
4191   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4192   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4193   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4194   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4195   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4196   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4197   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4198 
4199   m      = merge->rowmap->n;
4200   M      = merge->rowmap->N;
4201   owners = merge->rowmap->range;
4202 
4203   /* determine the number of messages to send, their lengths */
4204   /*---------------------------------------------------------*/
4205   len_s  = merge->len_s;
4206 
4207   len = 0;  /* length of buf_si[] */
4208   merge->nsend = 0;
4209   for (proc=0; proc<size; proc++){
4210     len_si[proc] = 0;
4211     if (proc == rank){
4212       len_s[proc] = 0;
4213     } else {
4214       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4215       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4216     }
4217     if (len_s[proc]) {
4218       merge->nsend++;
4219       nrows = 0;
4220       for (i=owners[proc]; i<owners[proc+1]; i++){
4221         if (ai[i+1] > ai[i]) nrows++;
4222       }
4223       len_si[proc] = 2*(nrows+1);
4224       len += len_si[proc];
4225     }
4226   }
4227 
4228   /* determine the number and length of messages to receive for ij-structure */
4229   /*-------------------------------------------------------------------------*/
4230   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4231   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4232 
4233   /* post the Irecv of j-structure */
4234   /*-------------------------------*/
4235   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4236   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4237 
4238   /* post the Isend of j-structure */
4239   /*--------------------------------*/
4240   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4241 
4242   for (proc=0, k=0; proc<size; proc++){
4243     if (!len_s[proc]) continue;
4244     i = owners[proc];
4245     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4246     k++;
4247   }
4248 
4249   /* receives and sends of j-structure are complete */
4250   /*------------------------------------------------*/
4251   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4252   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4253 
4254   /* send and recv i-structure */
4255   /*---------------------------*/
4256   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4257   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4258 
4259   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4260   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4261   for (proc=0,k=0; proc<size; proc++){
4262     if (!len_s[proc]) continue;
4263     /* form outgoing message for i-structure:
4264          buf_si[0]:                 nrows to be sent
4265                [1:nrows]:           row index (global)
4266                [nrows+1:2*nrows+1]: i-structure index
4267     */
4268     /*-------------------------------------------*/
4269     nrows = len_si[proc]/2 - 1;
4270     buf_si_i    = buf_si + nrows+1;
4271     buf_si[0]   = nrows;
4272     buf_si_i[0] = 0;
4273     nrows = 0;
4274     for (i=owners[proc]; i<owners[proc+1]; i++){
4275       anzi = ai[i+1] - ai[i];
4276       if (anzi) {
4277         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4278         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4279         nrows++;
4280       }
4281     }
4282     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4283     k++;
4284     buf_si += len_si[proc];
4285   }
4286 
4287   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4288   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4289 
4290   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4291   for (i=0; i<merge->nrecv; i++){
4292     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4293   }
4294 
4295   ierr = PetscFree(len_si);CHKERRQ(ierr);
4296   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4297   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4298   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4299   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4300   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4301   ierr = PetscFree(status);CHKERRQ(ierr);
4302 
4303   /* compute a local seq matrix in each processor */
4304   /*----------------------------------------------*/
4305   /* allocate bi array and free space for accumulating nonzero column info */
4306   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4307   bi[0] = 0;
4308 
4309   /* create and initialize a linked list */
4310   nlnk = N+1;
4311   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4312 
4313   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4314   len = 0;
4315   len  = ai[owners[rank+1]] - ai[owners[rank]];
4316   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4317   current_space = free_space;
4318 
4319   /* determine symbolic info for each local row */
4320   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4321 
4322   for (k=0; k<merge->nrecv; k++){
4323     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4324     nrows = *buf_ri_k[k];
4325     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4326     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4327   }
4328 
4329   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4330   len = 0;
4331   for (i=0;i<m;i++) {
4332     bnzi   = 0;
4333     /* add local non-zero cols of this proc's seqmat into lnk */
4334     arow   = owners[rank] + i;
4335     anzi   = ai[arow+1] - ai[arow];
4336     aj     = a->j + ai[arow];
4337     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4338     bnzi += nlnk;
4339     /* add received col data into lnk */
4340     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4341       if (i == *nextrow[k]) { /* i-th row */
4342         anzi = *(nextai[k]+1) - *nextai[k];
4343         aj   = buf_rj[k] + *nextai[k];
4344         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4345         bnzi += nlnk;
4346         nextrow[k]++; nextai[k]++;
4347       }
4348     }
4349     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4350 
4351     /* if free space is not available, make more free space */
4352     if (current_space->local_remaining<bnzi) {
4353       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4354       nspacedouble++;
4355     }
4356     /* copy data into free space, then initialize lnk */
4357     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4358     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4359 
4360     current_space->array           += bnzi;
4361     current_space->local_used      += bnzi;
4362     current_space->local_remaining -= bnzi;
4363 
4364     bi[i+1] = bi[i] + bnzi;
4365   }
4366 
4367   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4368 
4369   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4370   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4371   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4372 
4373   /* create symbolic parallel matrix B_mpi */
4374   /*---------------------------------------*/
4375   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4376   if (n==PETSC_DECIDE) {
4377     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4378   } else {
4379     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4380   }
4381   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4382   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4383   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4384 
4385   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4386   B_mpi->assembled     = PETSC_FALSE;
4387   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4388   merge->bi            = bi;
4389   merge->bj            = bj;
4390   merge->buf_ri        = buf_ri;
4391   merge->buf_rj        = buf_rj;
4392   merge->coi           = PETSC_NULL;
4393   merge->coj           = PETSC_NULL;
4394   merge->owners_co     = PETSC_NULL;
4395 
4396   /* attach the supporting struct to B_mpi for reuse */
4397   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4398   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4399   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4400   *mpimat = B_mpi;
4401 
4402   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4403   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4404   PetscFunctionReturn(0);
4405 }
4406 
4407 #undef __FUNCT__
4408 #define __FUNCT__ "MatMerge_SeqsToMPI"
4409 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4410 {
4411   PetscErrorCode   ierr;
4412 
4413   PetscFunctionBegin;
4414   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4415   if (scall == MAT_INITIAL_MATRIX){
4416     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4417   }
4418   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4419   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4420   PetscFunctionReturn(0);
4421 }
4422 
4423 #undef __FUNCT__
4424 #define __FUNCT__ "MatGetLocalMat"
4425 /*@
4426      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4427 
4428     Not Collective
4429 
4430    Input Parameters:
4431 +    A - the matrix
4432 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4433 
4434    Output Parameter:
4435 .    A_loc - the local sequential matrix generated
4436 
4437     Level: developer
4438 
4439 @*/
4440 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4441 {
4442   PetscErrorCode  ierr;
4443   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4444   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4445   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4446   MatScalar       *aa=a->a,*ba=b->a,*cam;
4447   PetscScalar     *ca;
4448   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4449   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4450 
4451   PetscFunctionBegin;
4452   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4453   if (scall == MAT_INITIAL_MATRIX){
4454     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4455     ci[0] = 0;
4456     for (i=0; i<am; i++){
4457       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4458     }
4459     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4460     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4461     k = 0;
4462     for (i=0; i<am; i++) {
4463       ncols_o = bi[i+1] - bi[i];
4464       ncols_d = ai[i+1] - ai[i];
4465       /* off-diagonal portion of A */
4466       for (jo=0; jo<ncols_o; jo++) {
4467         col = cmap[*bj];
4468         if (col >= cstart) break;
4469         cj[k]   = col; bj++;
4470         ca[k++] = *ba++;
4471       }
4472       /* diagonal portion of A */
4473       for (j=0; j<ncols_d; j++) {
4474         cj[k]   = cstart + *aj++;
4475         ca[k++] = *aa++;
4476       }
4477       /* off-diagonal portion of A */
4478       for (j=jo; j<ncols_o; j++) {
4479         cj[k]   = cmap[*bj++];
4480         ca[k++] = *ba++;
4481       }
4482     }
4483     /* put together the new matrix */
4484     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4485     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4486     /* Since these are PETSc arrays, change flags to free them as necessary. */
4487     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4488     mat->free_a  = PETSC_TRUE;
4489     mat->free_ij = PETSC_TRUE;
4490     mat->nonew   = 0;
4491   } else if (scall == MAT_REUSE_MATRIX){
4492     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4493     ci = mat->i; cj = mat->j; cam = mat->a;
4494     for (i=0; i<am; i++) {
4495       /* off-diagonal portion of A */
4496       ncols_o = bi[i+1] - bi[i];
4497       for (jo=0; jo<ncols_o; jo++) {
4498         col = cmap[*bj];
4499         if (col >= cstart) break;
4500         *cam++ = *ba++; bj++;
4501       }
4502       /* diagonal portion of A */
4503       ncols_d = ai[i+1] - ai[i];
4504       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4505       /* off-diagonal portion of A */
4506       for (j=jo; j<ncols_o; j++) {
4507         *cam++ = *ba++; bj++;
4508       }
4509     }
4510   } else {
4511     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4512   }
4513 
4514   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4515   PetscFunctionReturn(0);
4516 }
4517 
4518 #undef __FUNCT__
4519 #define __FUNCT__ "MatGetLocalMatCondensed"
4520 /*@C
4521      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4522 
4523     Not Collective
4524 
4525    Input Parameters:
4526 +    A - the matrix
4527 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4528 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4529 
4530    Output Parameter:
4531 .    A_loc - the local sequential matrix generated
4532 
4533     Level: developer
4534 
4535 @*/
4536 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4537 {
4538   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4539   PetscErrorCode    ierr;
4540   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4541   IS                isrowa,iscola;
4542   Mat               *aloc;
4543 
4544   PetscFunctionBegin;
4545   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4546   if (!row){
4547     start = A->rmap->rstart; end = A->rmap->rend;
4548     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4549   } else {
4550     isrowa = *row;
4551   }
4552   if (!col){
4553     start = A->cmap->rstart;
4554     cmap  = a->garray;
4555     nzA   = a->A->cmap->n;
4556     nzB   = a->B->cmap->n;
4557     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4558     ncols = 0;
4559     for (i=0; i<nzB; i++) {
4560       if (cmap[i] < start) idx[ncols++] = cmap[i];
4561       else break;
4562     }
4563     imark = i;
4564     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4565     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4566     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4567     ierr = PetscFree(idx);CHKERRQ(ierr);
4568   } else {
4569     iscola = *col;
4570   }
4571   if (scall != MAT_INITIAL_MATRIX){
4572     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4573     aloc[0] = *A_loc;
4574   }
4575   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4576   *A_loc = aloc[0];
4577   ierr = PetscFree(aloc);CHKERRQ(ierr);
4578   if (!row){
4579     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4580   }
4581   if (!col){
4582     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4583   }
4584   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4585   PetscFunctionReturn(0);
4586 }
4587 
4588 #undef __FUNCT__
4589 #define __FUNCT__ "MatGetBrowsOfAcols"
4590 /*@C
4591     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4592 
4593     Collective on Mat
4594 
4595    Input Parameters:
4596 +    A,B - the matrices in mpiaij format
4597 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4598 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4599 
4600    Output Parameter:
4601 +    rowb, colb - index sets of rows and columns of B to extract
4602 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4603 -    B_seq - the sequential matrix generated
4604 
4605     Level: developer
4606 
4607 @*/
4608 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4609 {
4610   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4611   PetscErrorCode    ierr;
4612   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4613   IS                isrowb,iscolb;
4614   Mat               *bseq;
4615 
4616   PetscFunctionBegin;
4617   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4618     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4619   }
4620   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4621 
4622   if (scall == MAT_INITIAL_MATRIX){
4623     start = A->cmap->rstart;
4624     cmap  = a->garray;
4625     nzA   = a->A->cmap->n;
4626     nzB   = a->B->cmap->n;
4627     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4628     ncols = 0;
4629     for (i=0; i<nzB; i++) {  /* row < local row index */
4630       if (cmap[i] < start) idx[ncols++] = cmap[i];
4631       else break;
4632     }
4633     imark = i;
4634     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4635     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4636     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4637     ierr = PetscFree(idx);CHKERRQ(ierr);
4638     *brstart = imark;
4639     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4640   } else {
4641     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4642     isrowb = *rowb; iscolb = *colb;
4643     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4644     bseq[0] = *B_seq;
4645   }
4646   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4647   *B_seq = bseq[0];
4648   ierr = PetscFree(bseq);CHKERRQ(ierr);
4649   if (!rowb){
4650     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4651   } else {
4652     *rowb = isrowb;
4653   }
4654   if (!colb){
4655     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4656   } else {
4657     *colb = iscolb;
4658   }
4659   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4660   PetscFunctionReturn(0);
4661 }
4662 
4663 #undef __FUNCT__
4664 #define __FUNCT__ "MatGetBrowsOfAoCols"
4665 /*@C
4666     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4667     of the OFF-DIAGONAL portion of local A
4668 
4669     Collective on Mat
4670 
4671    Input Parameters:
4672 +    A,B - the matrices in mpiaij format
4673 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4674 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4675 .    startsj_r - similar to startsj for receives
4676 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4677 
4678    Output Parameter:
4679 +    B_oth - the sequential matrix generated
4680 
4681     Level: developer
4682 
4683 @*/
4684 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4685 {
4686   VecScatter_MPI_General *gen_to,*gen_from;
4687   PetscErrorCode         ierr;
4688   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4689   Mat_SeqAIJ             *b_oth;
4690   VecScatter             ctx=a->Mvctx;
4691   MPI_Comm               comm=((PetscObject)ctx)->comm;
4692   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4693   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4694   PetscScalar            *rvalues,*svalues;
4695   MatScalar              *b_otha,*bufa,*bufA;
4696   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4697   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4698   MPI_Status             *sstatus,rstatus;
4699   PetscMPIInt            jj;
4700   PetscInt               *cols,sbs,rbs;
4701   PetscScalar            *vals;
4702 
4703   PetscFunctionBegin;
4704   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4705     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4706   }
4707   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4708   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4709 
4710   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4711   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4712   rvalues  = gen_from->values; /* holds the length of receiving row */
4713   svalues  = gen_to->values;   /* holds the length of sending row */
4714   nrecvs   = gen_from->n;
4715   nsends   = gen_to->n;
4716 
4717   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4718   srow     = gen_to->indices;   /* local row index to be sent */
4719   sstarts  = gen_to->starts;
4720   sprocs   = gen_to->procs;
4721   sstatus  = gen_to->sstatus;
4722   sbs      = gen_to->bs;
4723   rstarts  = gen_from->starts;
4724   rprocs   = gen_from->procs;
4725   rbs      = gen_from->bs;
4726 
4727   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4728   if (scall == MAT_INITIAL_MATRIX){
4729     /* i-array */
4730     /*---------*/
4731     /*  post receives */
4732     for (i=0; i<nrecvs; i++){
4733       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4734       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4735       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4736     }
4737 
4738     /* pack the outgoing message */
4739     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
4740     sstartsj[0] = 0;  rstartsj[0] = 0;
4741     len = 0; /* total length of j or a array to be sent */
4742     k = 0;
4743     for (i=0; i<nsends; i++){
4744       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4745       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4746       for (j=0; j<nrows; j++) {
4747         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4748         for (l=0; l<sbs; l++){
4749           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4750           rowlen[j*sbs+l] = ncols;
4751           len += ncols;
4752           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4753         }
4754         k++;
4755       }
4756       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4757       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4758     }
4759     /* recvs and sends of i-array are completed */
4760     i = nrecvs;
4761     while (i--) {
4762       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4763     }
4764     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4765 
4766     /* allocate buffers for sending j and a arrays */
4767     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4768     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4769 
4770     /* create i-array of B_oth */
4771     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4772     b_othi[0] = 0;
4773     len = 0; /* total length of j or a array to be received */
4774     k = 0;
4775     for (i=0; i<nrecvs; i++){
4776       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4777       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4778       for (j=0; j<nrows; j++) {
4779         b_othi[k+1] = b_othi[k] + rowlen[j];
4780         len += rowlen[j]; k++;
4781       }
4782       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4783     }
4784 
4785     /* allocate space for j and a arrrays of B_oth */
4786     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4787     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4788 
4789     /* j-array */
4790     /*---------*/
4791     /*  post receives of j-array */
4792     for (i=0; i<nrecvs; i++){
4793       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4794       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4795     }
4796 
4797     /* pack the outgoing message j-array */
4798     k = 0;
4799     for (i=0; i<nsends; i++){
4800       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4801       bufJ = bufj+sstartsj[i];
4802       for (j=0; j<nrows; j++) {
4803         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4804         for (ll=0; ll<sbs; ll++){
4805           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4806           for (l=0; l<ncols; l++){
4807             *bufJ++ = cols[l];
4808           }
4809           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4810         }
4811       }
4812       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4813     }
4814 
4815     /* recvs and sends of j-array are completed */
4816     i = nrecvs;
4817     while (i--) {
4818       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4819     }
4820     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4821   } else if (scall == MAT_REUSE_MATRIX){
4822     sstartsj = *startsj;
4823     rstartsj = *startsj_r;
4824     bufa     = *bufa_ptr;
4825     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4826     b_otha   = b_oth->a;
4827   } else {
4828     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4829   }
4830 
4831   /* a-array */
4832   /*---------*/
4833   /*  post receives of a-array */
4834   for (i=0; i<nrecvs; i++){
4835     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4836     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4837   }
4838 
4839   /* pack the outgoing message a-array */
4840   k = 0;
4841   for (i=0; i<nsends; i++){
4842     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4843     bufA = bufa+sstartsj[i];
4844     for (j=0; j<nrows; j++) {
4845       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4846       for (ll=0; ll<sbs; ll++){
4847         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4848         for (l=0; l<ncols; l++){
4849           *bufA++ = vals[l];
4850         }
4851         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4852       }
4853     }
4854     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4855   }
4856   /* recvs and sends of a-array are completed */
4857   i = nrecvs;
4858   while (i--) {
4859     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4860   }
4861   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4862   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4863 
4864   if (scall == MAT_INITIAL_MATRIX){
4865     /* put together the new matrix */
4866     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4867 
4868     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4869     /* Since these are PETSc arrays, change flags to free them as necessary. */
4870     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4871     b_oth->free_a  = PETSC_TRUE;
4872     b_oth->free_ij = PETSC_TRUE;
4873     b_oth->nonew   = 0;
4874 
4875     ierr = PetscFree(bufj);CHKERRQ(ierr);
4876     if (!startsj || !bufa_ptr){
4877       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4878       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4879     } else {
4880       *startsj   = sstartsj;
4881       *startsj_r = rstartsj;
4882       *bufa_ptr  = bufa;
4883     }
4884   }
4885   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4886   PetscFunctionReturn(0);
4887 }
4888 
4889 #undef __FUNCT__
4890 #define __FUNCT__ "MatGetCommunicationStructs"
4891 /*@C
4892   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4893 
4894   Not Collective
4895 
4896   Input Parameters:
4897 . A - The matrix in mpiaij format
4898 
4899   Output Parameter:
4900 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4901 . colmap - A map from global column index to local index into lvec
4902 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4903 
4904   Level: developer
4905 
4906 @*/
4907 #if defined (PETSC_USE_CTABLE)
4908 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4909 #else
4910 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4911 #endif
4912 {
4913   Mat_MPIAIJ *a;
4914 
4915   PetscFunctionBegin;
4916   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4917   PetscValidPointer(lvec, 2)
4918   PetscValidPointer(colmap, 3)
4919   PetscValidPointer(multScatter, 4)
4920   a = (Mat_MPIAIJ *) A->data;
4921   if (lvec) *lvec = a->lvec;
4922   if (colmap) *colmap = a->colmap;
4923   if (multScatter) *multScatter = a->Mvctx;
4924   PetscFunctionReturn(0);
4925 }
4926 
4927 EXTERN_C_BEGIN
4928 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4929 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4930 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
4931 EXTERN_C_END
4932 
4933 #undef __FUNCT__
4934 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4935 /*
4936     Computes (B'*A')' since computing B*A directly is untenable
4937 
4938                n                       p                          p
4939         (              )       (              )         (                  )
4940       m (      A       )  *  n (       B      )   =   m (         C        )
4941         (              )       (              )         (                  )
4942 
4943 */
4944 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4945 {
4946   PetscErrorCode     ierr;
4947   Mat                At,Bt,Ct;
4948 
4949   PetscFunctionBegin;
4950   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4951   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4952   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4953   ierr = MatDestroy(At);CHKERRQ(ierr);
4954   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4955   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4956   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4957   PetscFunctionReturn(0);
4958 }
4959 
4960 #undef __FUNCT__
4961 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4962 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4963 {
4964   PetscErrorCode ierr;
4965   PetscInt       m=A->rmap->n,n=B->cmap->n;
4966   Mat            Cmat;
4967 
4968   PetscFunctionBegin;
4969   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4970   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4971   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4972   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4973   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4974   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4975   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4976   *C   = Cmat;
4977   PetscFunctionReturn(0);
4978 }
4979 
4980 /* ----------------------------------------------------------------*/
4981 #undef __FUNCT__
4982 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4983 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4984 {
4985   PetscErrorCode ierr;
4986 
4987   PetscFunctionBegin;
4988   if (scall == MAT_INITIAL_MATRIX){
4989     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4990   }
4991   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4992   PetscFunctionReturn(0);
4993 }
4994 
4995 EXTERN_C_BEGIN
4996 #if defined(PETSC_HAVE_MUMPS)
4997 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*);
4998 #endif
4999 #if defined(PETSC_HAVE_PASTIX)
5000 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5001 #endif
5002 #if defined(PETSC_HAVE_SUPERLU_DIST)
5003 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5004 #endif
5005 #if defined(PETSC_HAVE_SPOOLES)
5006 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5007 #endif
5008 EXTERN_C_END
5009 
5010 /*MC
5011    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5012 
5013    Options Database Keys:
5014 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5015 
5016   Level: beginner
5017 
5018 .seealso: MatCreateMPIAIJ()
5019 M*/
5020 
5021 EXTERN_C_BEGIN
5022 #undef __FUNCT__
5023 #define __FUNCT__ "MatCreate_MPIAIJ"
5024 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5025 {
5026   Mat_MPIAIJ     *b;
5027   PetscErrorCode ierr;
5028   PetscMPIInt    size;
5029 
5030   PetscFunctionBegin;
5031   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5032 
5033   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5034   B->data         = (void*)b;
5035   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5036   B->rmap->bs     = 1;
5037   B->assembled    = PETSC_FALSE;
5038   B->mapping      = 0;
5039 
5040   B->insertmode   = NOT_SET_VALUES;
5041   b->size         = size;
5042   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5043 
5044   /* build cache for off array entries formed */
5045   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5046   b->donotstash  = PETSC_FALSE;
5047   b->colmap      = 0;
5048   b->garray      = 0;
5049   b->roworiented = PETSC_TRUE;
5050 
5051   /* stuff used for matrix vector multiply */
5052   b->lvec      = PETSC_NULL;
5053   b->Mvctx     = PETSC_NULL;
5054 
5055   /* stuff for MatGetRow() */
5056   b->rowindices   = 0;
5057   b->rowvalues    = 0;
5058   b->getrowactive = PETSC_FALSE;
5059 
5060 #if defined(PETSC_HAVE_SPOOLES)
5061   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5062                                      "MatGetFactor_mpiaij_spooles",
5063                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5064 #endif
5065 #if defined(PETSC_HAVE_MUMPS)
5066   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5067                                      "MatGetFactor_mpiaij_mumps",
5068                                      MatGetFactor_mpiaij_mumps);CHKERRQ(ierr);
5069 #endif
5070 #if defined(PETSC_HAVE_PASTIX)
5071   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5072 					   "MatGetFactor_mpiaij_pastix",
5073 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5074 #endif
5075 #if defined(PETSC_HAVE_SUPERLU_DIST)
5076   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5077                                      "MatGetFactor_mpiaij_superlu_dist",
5078                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5079 #endif
5080   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5081                                      "MatStoreValues_MPIAIJ",
5082                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5083   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5084                                      "MatRetrieveValues_MPIAIJ",
5085                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5086   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5087 				     "MatGetDiagonalBlock_MPIAIJ",
5088                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5089   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5090 				     "MatIsTranspose_MPIAIJ",
5091 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5092   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5093 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5094 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5095   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5096 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5097 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5098   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5099 				     "MatDiagonalScaleLocal_MPIAIJ",
5100 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5101   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
5102                                      "MatConvert_MPIAIJ_MPICSRPERM",
5103                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5104   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5105                                      "MatConvert_MPIAIJ_MPICRL",
5106                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5107   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5108                                      "MatConvert_MPIAIJ_MPISBAIJ",
5109                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5110   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5111                                      "MatMatMult_MPIDense_MPIAIJ",
5112                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5113   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5114                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5115                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5116   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5117                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5118                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5119   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5120   PetscFunctionReturn(0);
5121 }
5122 EXTERN_C_END
5123 
5124 #undef __FUNCT__
5125 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5126 /*@
5127      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5128          and "off-diagonal" part of the matrix in CSR format.
5129 
5130    Collective on MPI_Comm
5131 
5132    Input Parameters:
5133 +  comm - MPI communicator
5134 .  m - number of local rows (Cannot be PETSC_DECIDE)
5135 .  n - This value should be the same as the local size used in creating the
5136        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5137        calculated if N is given) For square matrices n is almost always m.
5138 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5139 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5140 .   i - row indices for "diagonal" portion of matrix
5141 .   j - column indices
5142 .   a - matrix values
5143 .   oi - row indices for "off-diagonal" portion of matrix
5144 .   oj - column indices
5145 -   oa - matrix values
5146 
5147    Output Parameter:
5148 .   mat - the matrix
5149 
5150    Level: advanced
5151 
5152    Notes:
5153        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5154 
5155        The i and j indices are 0 based
5156 
5157        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5158 
5159        This sets local rows and cannot be used to set off-processor values.
5160 
5161        You cannot later use MatSetValues() to change values in this matrix.
5162 
5163 .keywords: matrix, aij, compressed row, sparse, parallel
5164 
5165 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5166           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5167 @*/
5168 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5169 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5170 {
5171   PetscErrorCode ierr;
5172   Mat_MPIAIJ     *maij;
5173 
5174  PetscFunctionBegin;
5175   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5176   if (i[0]) {
5177     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5178   }
5179   if (oi[0]) {
5180     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5181   }
5182   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5183   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5184   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5185   maij = (Mat_MPIAIJ*) (*mat)->data;
5186   maij->donotstash     = PETSC_TRUE;
5187   (*mat)->preallocated = PETSC_TRUE;
5188 
5189   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5190   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5191   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5192   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5193 
5194   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5195   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5196 
5197   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5198   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5199   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5200   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5201 
5202   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5203   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5204   PetscFunctionReturn(0);
5205 }
5206 
5207 /*
5208     Special version for direct calls from Fortran
5209 */
5210 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5211 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5212 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5213 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5214 #endif
5215 
5216 /* Change these macros so can be used in void function */
5217 #undef CHKERRQ
5218 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5219 #undef SETERRQ2
5220 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5221 #undef SETERRQ
5222 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5223 
5224 EXTERN_C_BEGIN
5225 #undef __FUNCT__
5226 #define __FUNCT__ "matsetvaluesmpiaij_"
5227 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5228 {
5229   Mat             mat = *mmat;
5230   PetscInt        m = *mm, n = *mn;
5231   InsertMode      addv = *maddv;
5232   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5233   PetscScalar     value;
5234   PetscErrorCode  ierr;
5235 
5236   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5237   if (mat->insertmode == NOT_SET_VALUES) {
5238     mat->insertmode = addv;
5239   }
5240 #if defined(PETSC_USE_DEBUG)
5241   else if (mat->insertmode != addv) {
5242     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5243   }
5244 #endif
5245   {
5246   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5247   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5248   PetscTruth      roworiented = aij->roworiented;
5249 
5250   /* Some Variables required in the macro */
5251   Mat             A = aij->A;
5252   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5253   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5254   MatScalar       *aa = a->a;
5255   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5256   Mat             B = aij->B;
5257   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5258   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5259   MatScalar       *ba = b->a;
5260 
5261   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5262   PetscInt        nonew = a->nonew;
5263   MatScalar       *ap1,*ap2;
5264 
5265   PetscFunctionBegin;
5266   for (i=0; i<m; i++) {
5267     if (im[i] < 0) continue;
5268 #if defined(PETSC_USE_DEBUG)
5269     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5270 #endif
5271     if (im[i] >= rstart && im[i] < rend) {
5272       row      = im[i] - rstart;
5273       lastcol1 = -1;
5274       rp1      = aj + ai[row];
5275       ap1      = aa + ai[row];
5276       rmax1    = aimax[row];
5277       nrow1    = ailen[row];
5278       low1     = 0;
5279       high1    = nrow1;
5280       lastcol2 = -1;
5281       rp2      = bj + bi[row];
5282       ap2      = ba + bi[row];
5283       rmax2    = bimax[row];
5284       nrow2    = bilen[row];
5285       low2     = 0;
5286       high2    = nrow2;
5287 
5288       for (j=0; j<n; j++) {
5289         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5290         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5291         if (in[j] >= cstart && in[j] < cend){
5292           col = in[j] - cstart;
5293           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5294         } else if (in[j] < 0) continue;
5295 #if defined(PETSC_USE_DEBUG)
5296         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5297 #endif
5298         else {
5299           if (mat->was_assembled) {
5300             if (!aij->colmap) {
5301               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5302             }
5303 #if defined (PETSC_USE_CTABLE)
5304             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5305 	    col--;
5306 #else
5307             col = aij->colmap[in[j]] - 1;
5308 #endif
5309             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5310               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5311               col =  in[j];
5312               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5313               B = aij->B;
5314               b = (Mat_SeqAIJ*)B->data;
5315               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5316               rp2      = bj + bi[row];
5317               ap2      = ba + bi[row];
5318               rmax2    = bimax[row];
5319               nrow2    = bilen[row];
5320               low2     = 0;
5321               high2    = nrow2;
5322               bm       = aij->B->rmap->n;
5323               ba = b->a;
5324             }
5325           } else col = in[j];
5326           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5327         }
5328       }
5329     } else {
5330       if (!aij->donotstash) {
5331         if (roworiented) {
5332           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5333         } else {
5334           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5335         }
5336       }
5337     }
5338   }}
5339   PetscFunctionReturnVoid();
5340 }
5341 EXTERN_C_END
5342 
5343